aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.html118
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html22
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-cleanup.svg123
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-1.svg16
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-3.svg56
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg237
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg12
-rw-r--r--Documentation/RCU/stallwarn.txt24
-rw-r--r--Documentation/RCU/whatisRCU.txt18
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/core-api/atomic_ops.rst2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt2
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt34
-rw-r--r--Documentation/memory-barriers.txt43
-rw-r--r--Documentation/networking/dpaa2/overview.rst1
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt22
-rw-r--r--Documentation/x86/intel_rdt_ui.txt380
-rw-r--r--Documentation/x86/x86_64/boot-options.txt8
-rw-r--r--MAINTAINERS18
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/include/asm/atomic.h64
-rw-r--r--arch/arc/Kconfig3
-rw-r--r--arch/arc/include/asm/atomic.h86
-rw-r--r--arch/arc/include/asm/cache.h4
-rw-r--r--arch/arc/include/asm/delay.h3
-rw-r--r--arch/arc/mm/cache.c17
-rw-r--r--arch/arc/mm/dma.c49
-rw-r--r--arch/arc/plat-eznps/include/plat/ctop.h10
-rw-r--r--arch/arc/plat-eznps/mtm.c6
-rw-r--r--arch/arm/Kconfig19
-rw-r--r--arch/arm/include/asm/atomic.h55
-rw-r--r--arch/arm/include/asm/efi.h3
-rw-r--r--arch/arm/include/asm/irq.h5
-rw-r--r--arch/arm/include/asm/mach/arch.h2
-rw-r--r--arch/arm/include/asm/mach/time.h3
-rw-r--r--arch/arm/include/asm/tlb.h8
-rw-r--r--arch/arm/kernel/entry-armv.S10
-rw-r--r--arch/arm/kernel/entry-common.S4
-rw-r--r--arch/arm/kernel/irq.c10
-rw-r--r--arch/arm/kernel/setup.c2
-rw-r--r--arch/arm/kernel/time.c15
-rw-r--r--arch/arm/mach-rpc/ecard.c5
-rw-r--r--arch/arm/plat-omap/counter_32k.c2
-rw-r--r--arch/arm64/Kconfig4
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S150
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S76
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c8
-rw-r--r--arch/arm64/include/asm/atomic.h47
-rw-r--r--arch/arm64/include/asm/bitops.h21
-rw-r--r--arch/arm64/include/asm/efi.h3
-rw-r--r--arch/arm64/include/asm/irq.h2
-rw-r--r--arch/arm64/include/asm/tlb.h4
-rw-r--r--arch/arm64/kernel/irq.c10
-rw-r--r--arch/arm64/lib/Makefile2
-rw-r--r--arch/arm64/lib/bitops.S76
-rw-r--r--arch/arm64/mm/hugetlbpage.c10
-rw-r--r--arch/arm64/mm/mmu.c4
-rw-r--r--arch/h8300/include/asm/atomic.h19
-rw-r--r--arch/hexagon/include/asm/atomic.h18
-rw-r--r--arch/ia64/include/asm/atomic.h81
-rw-r--r--arch/ia64/include/asm/tlb.h7
-rw-r--r--arch/ia64/mm/init.c2
-rw-r--r--arch/m68k/Kconfig5
-rw-r--r--arch/m68k/apollo/config.c8
-rw-r--r--arch/m68k/atari/config.c5
-rw-r--r--arch/m68k/atari/time.c63
-rw-r--r--arch/m68k/bvme6000/config.c45
-rw-r--r--arch/m68k/configs/amiga_defconfig32
-rw-r--r--arch/m68k/configs/apollo_defconfig30
-rw-r--r--arch/m68k/configs/atari_defconfig29
-rw-r--r--arch/m68k/configs/bvme6000_defconfig30
-rw-r--r--arch/m68k/configs/hp300_defconfig30
-rw-r--r--arch/m68k/configs/mac_defconfig30
-rw-r--r--arch/m68k/configs/multi_defconfig32
-rw-r--r--arch/m68k/configs/mvme147_defconfig30
-rw-r--r--arch/m68k/configs/mvme16x_defconfig30
-rw-r--r--arch/m68k/configs/q40_defconfig30
-rw-r--r--arch/m68k/configs/sun3_defconfig28
-rw-r--r--arch/m68k/configs/sun3x_defconfig30
-rw-r--r--arch/m68k/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/asm/atomic.h24
-rw-r--r--arch/m68k/include/asm/bitops.h14
-rw-r--r--arch/m68k/include/asm/dma-mapping.h12
-rw-r--r--arch/m68k/include/asm/io.h7
-rw-r--r--arch/m68k/include/asm/io_mm.h42
-rw-r--r--arch/m68k/include/asm/io_no.h12
-rw-r--r--arch/m68k/include/asm/kmap.h9
-rw-r--r--arch/m68k/include/asm/machdep.h1
-rw-r--r--arch/m68k/include/asm/macintosh.h1
-rw-r--r--arch/m68k/include/asm/page_no.h2
-rw-r--r--arch/m68k/kernel/dma.c68
-rw-r--r--arch/m68k/kernel/setup_mm.c15
-rw-r--r--arch/m68k/kernel/setup_no.c21
-rw-r--r--arch/m68k/mac/config.c21
-rw-r--r--arch/m68k/mac/misc.c80
-rw-r--r--arch/m68k/mm/init.c1
-rw-r--r--arch/m68k/mm/mcfmmu.c13
-rw-r--r--arch/m68k/mm/motorola.c35
-rw-r--r--arch/m68k/mvme147/config.c7
-rw-r--r--arch/m68k/mvme16x/config.c8
-rw-r--r--arch/m68k/q40/config.c30
-rw-r--r--arch/m68k/sun3/config.c4
-rw-r--r--arch/mips/include/asm/atomic.h172
-rw-r--r--arch/mips/kvm/mips.c4
-rw-r--r--arch/openrisc/Kconfig5
-rw-r--r--arch/openrisc/include/asm/atomic.h4
-rw-r--r--arch/openrisc/include/asm/cmpxchg.h3
-rw-r--r--arch/openrisc/include/asm/irq.h2
-rw-r--r--arch/openrisc/kernel/irq.c7
-rw-r--r--arch/parisc/Kconfig3
-rw-r--r--arch/parisc/include/asm/atomic.h107
-rw-r--r--arch/parisc/include/asm/barrier.h32
-rw-r--r--arch/parisc/kernel/entry.S2
-rw-r--r--arch/parisc/kernel/pacache.S1
-rw-r--r--arch/parisc/kernel/syscall.S4
-rw-r--r--arch/powerpc/include/asm/atomic.h69
-rw-r--r--arch/powerpc/include/asm/mmu_context.h33
-rw-r--r--arch/powerpc/kernel/pci-common.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c6
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c3
-rw-r--r--arch/powerpc/platforms/pseries/setup.c3
-rw-r--r--arch/riscv/include/asm/atomic.h166
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/atomic.h65
-rw-r--r--arch/s390/kernel/time.c15
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/sh/include/asm/atomic.h35
-rw-r--r--arch/sh/include/asm/cmpxchg-xchg.h3
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/atomic_32.h24
-rw-r--r--arch/sparc/include/asm/atomic_64.h65
-rw-r--r--arch/sparc/include/asm/msi.h32
-rw-r--r--arch/sparc/kernel/time_64.c2
-rw-r--r--arch/sparc/lib/atomic32.c4
-rw-r--r--arch/sparc/mm/srmmu.c20
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/bitops.h3
-rw-r--r--arch/x86/boot/compressed/eboot.c545
-rw-r--r--arch/x86/boot/compressed/eboot.h12
-rw-r--r--arch/x86/boot/compressed/kaslr.c98
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c73
-rw-r--r--arch/x86/boot/string.c5
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S2
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c12
-rw-r--r--arch/x86/crypto/aegis128l-aesni-asm.S2
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c12
-rw-r--r--arch/x86/crypto/aegis256-aesni-asm.S2
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c12
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S8
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S4
-rw-r--r--arch/x86/crypto/morus1280-avx2-asm.S2
-rw-r--r--arch/x86/crypto/morus1280-avx2-glue.c10
-rw-r--r--arch/x86/crypto/morus1280-sse2-asm.S2
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c10
-rw-r--r--arch/x86/crypto/morus640-sse2-asm.S2
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c10
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S2
-rw-r--r--arch/x86/entry/entry_32.S632
-rw-r--r--arch/x86/entry/entry_64.S5
-rw-r--r--arch/x86/entry/vdso/Makefile26
-rw-r--r--arch/x86/hyperv/hv_apic.c59
-rw-r--r--arch/x86/hyperv/mmu.c80
-rw-r--r--arch/x86/include/asm/atomic.h32
-rw-r--r--arch/x86/include/asm/atomic64_32.h61
-rw-r--r--arch/x86/include/asm/atomic64_64.h50
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h4
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/intel-family.h13
-rw-r--r--arch/x86/include/asm/intel-mid.h43
-rw-r--r--arch/x86/include/asm/irqflags.h2
-rw-r--r--arch/x86/include/asm/kvm_guest.h7
-rw-r--r--arch/x86/include/asm/kvm_para.h1
-rw-r--r--arch/x86/include/asm/mmu_context.h5
-rw-r--r--arch/x86/include/asm/mshyperv.h34
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/orc_types.h2
-rw-r--r--arch/x86/include/asm/percpu.h7
-rw-r--r--arch/x86/include/asm/pgtable-2level.h9
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h3
-rw-r--r--arch/x86/include/asm/pgtable-3level.h7
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h6
-rw-r--r--arch/x86/include/asm/pgtable.h94
-rw-r--r--arch/x86/include/asm/pgtable_32.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h9
-rw-r--r--arch/x86/include/asm/pgtable_64.h89
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h3
-rw-r--r--arch/x86/include/asm/pgtable_types.h28
-rw-r--r--arch/x86/include/asm/processor-flags.h8
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/pti.h3
-rw-r--r--arch/x86/include/asm/refcount.h1
-rw-r--r--arch/x86/include/asm/sections.h1
-rw-r--r--arch/x86/include/asm/set_memory.h1
-rw-r--r--arch/x86/include/asm/switch_to.h16
-rw-r--r--arch/x86/include/asm/text-patching.h1
-rw-r--r--arch/x86/include/asm/tlbflush.h21
-rw-r--r--arch/x86/include/asm/trace/hyperv.h15
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/include/asm/unwind_hints.h16
-rw-r--r--arch/x86/kernel/alternative.c7
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/vector.c19
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c4
-rw-r--r--arch/x86/kernel/asm-offsets.c5
-rw-r--r--arch/x86/kernel/asm-offsets_32.c10
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c13
-rw-r--r--arch/x86/kernel/cpu/bugs.c58
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/intel.c10
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c11
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h143
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c129
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c1522
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h43
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c808
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c202
-rw-r--r--arch/x86/kernel/dumpstack.c28
-rw-r--r--arch/x86/kernel/head_32.S20
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/jump_label.c11
-rw-r--r--arch/x86/kernel/kvm.c18
-rw-r--r--arch/x86/kernel/kvmclock.c258
-rw-r--r--arch/x86/kernel/ldt.c137
-rw-r--r--arch/x86/kernel/machine_kexec_32.c5
-rw-r--r--arch/x86/kernel/paravirt.c14
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/pci-iommu_table.c2
-rw-r--r--arch/x86/kernel/pcspeaker.c2
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/stacktrace.c42
-rw-r--r--arch/x86/kernel/tsc.c259
-rw-r--r--arch/x86/kernel/tsc_msr.c96
-rw-r--r--arch/x86/kernel/unwind_orc.c52
-rw-r--r--arch/x86/kernel/vm86_32.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S17
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/vmx.c22
-rw-r--r--arch/x86/lib/memcpy_64.S2
-rw-r--r--arch/x86/mm/dump_pagetables.c27
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init.c37
-rw-r--r--arch/x86/mm/init_64.c14
-rw-r--r--arch/x86/mm/numa_emulation.c107
-rw-r--r--arch/x86/mm/pageattr.c19
-rw-r--r--arch/x86/mm/pgtable.c169
-rw-r--r--arch/x86/mm/pti.c261
-rw-r--r--arch/x86/mm/tlb.c224
-rw-r--r--arch/x86/net/bpf_jit_comp32.c8
-rw-r--r--arch/x86/platform/efi/efi_64.c101
-rw-r--r--arch/x86/platform/efi/quirks.c14
-rw-r--r--arch/x86/platform/intel-mid/Makefile2
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c23
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_weak_decls.h18
-rw-r--r--arch/x86/platform/intel-mid/mfld.c70
-rw-r--r--arch/x86/platform/intel-mid/mrfld.c105
-rw-r--r--arch/x86/platform/olpc/olpc.c4
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/power/hibernate_asm_64.S2
-rw-r--r--arch/x86/tools/relocs.c1
-rw-r--r--arch/x86/um/vdso/.gitignore1
-rw-r--r--arch/x86/um/vdso/Makefile16
-rw-r--r--arch/x86/xen/enlighten_pv.c51
-rw-r--r--arch/x86/xen/mmu_pv.c6
-rw-r--r--arch/x86/xen/suspend_pv.c5
-rw-r--r--arch/x86/xen/time.c18
-rw-r--r--arch/x86/xen/xen-ops.h6
-rw-r--r--arch/xtensa/include/asm/atomic.h98
-rw-r--r--block/blk-core.c5
-rw-r--r--block/blk-mq-tag.c2
-rw-r--r--drivers/acpi/acpi_lpss.c26
-rw-r--r--drivers/acpi/acpica/psloop.c19
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/block/zram/zram_drv.c15
-rw-r--r--drivers/clocksource/Makefile2
-rw-r--r--drivers/clocksource/mtk_timer.c268
-rw-r--r--drivers/clocksource/tegra20_timer.c4
-rw-r--r--drivers/clocksource/timer-atcpit100.c2
-rw-r--r--drivers/clocksource/timer-keystone.c2
-rw-r--r--drivers/clocksource/timer-mediatek.c328
-rw-r--r--drivers/clocksource/timer-sprd.c50
-rw-r--r--drivers/clocksource/timer-ti-32k.c3
-rw-r--r--drivers/clocksource/zevio-timer.c2
-rw-r--r--drivers/cpufreq/intel_pstate.c17
-rw-r--r--drivers/crypto/padlock-aes.c8
-rw-r--r--drivers/firmware/efi/Kconfig12
-rw-r--r--drivers/firmware/efi/cper.c19
-rw-r--r--drivers/firmware/efi/efi.c23
-rw-r--r--drivers/firmware/efi/esrt.c8
-rw-r--r--drivers/firmware/efi/libstub/arm-stub.c32
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c31
-rw-r--r--drivers/firmware/efi/libstub/efistub.h3
-rw-r--r--drivers/firmware/efi/runtime-wrappers.c202
-rw-r--r--drivers/gpio/gpiolib-acpi.c56
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_drv.c12
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c8
-rw-r--r--drivers/gpu/drm/drm_context.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c3
-rw-r--r--drivers/i2c/busses/i2c-xlp9xx.c41
-rw-r--r--drivers/infiniband/core/rdma_core.c2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c59
-rw-r--r--drivers/input/keyboard/hilkbd.c4
-rw-r--r--drivers/irqchip/Kconfig16
-rw-r--r--drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c3
-rw-r--r--drivers/irqchip/irq-gic-v3-its-pci-msi.c16
-rw-r--r--drivers/irqchip/irq-gic-v3-its-platform-msi.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c243
-rw-r--r--drivers/irqchip/irq-gic-v3.c4
-rw-r--r--drivers/irqchip/irq-ingenic.c1
-rw-r--r--drivers/irqchip/irq-stm32-exti.c1
-rw-r--r--drivers/media/platform/vsp1/vsp1_drm.c4
-rw-r--r--drivers/media/rc/bpf-lirc.c1
-rw-r--r--drivers/media/rc/rc-ir-raw.c8
-rw-r--r--drivers/media/rc/rc-main.c12
-rw-r--r--drivers/mmc/host/mxcmmc.c3
-rw-r--r--drivers/net/bonding/bond_main.c14
-rw-r--r--drivers/net/can/usb/ems_usb.c1
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c4
-rw-r--r--drivers/net/ethernet/8390/mac8390.c20
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c1
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-mdio.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c6
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c2
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c80
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c51
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c40
-rw-r--r--drivers/net/ethernet/ti/cpsw.c25
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.c2
-rw-r--r--drivers/net/netdevsim/devlink.c1
-rw-r--r--drivers/net/phy/mdio-mux-bcm-iproc.c2
-rw-r--r--drivers/net/usb/lan78xx.c2
-rw-r--r--drivers/net/virtio_net.c41
-rw-r--r--drivers/net/wan/lmc/lmc_main.c2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/9000.c69
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-config.h5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c22
-rw-r--r--drivers/net/xen-netfront.c14
-rw-r--r--drivers/nubus/bus.c3
-rw-r--r--drivers/pci/bus.c6
-rw-r--r--drivers/pci/controller/pcie-mobiveil.c2
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c2
-rw-r--r--drivers/pci/pci.h11
-rw-r--r--drivers/pci/probe.c4
-rw-r--r--drivers/pci/remove.c5
-rw-r--r--drivers/scsi/fcoe/fcoe_ctlr.c6
-rw-r--r--drivers/scsi/libfc/fc_rport.c1
-rw-r--r--drivers/scsi/libiscsi.c12
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c16
-rw-r--r--drivers/scsi/qedi/qedi_main.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c1
-rw-r--r--drivers/scsi/qla2xxx/qla_gbl.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_gs.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c7
-rw-r--r--drivers/scsi/qla2xxx/qla_inline.h2
-rw-r--r--drivers/scsi/qla2xxx/qla_iocb.c53
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c6
-rw-r--r--drivers/scsi/qla2xxx/qla_mid.c11
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c51
-rw-r--r--drivers/scsi/qla2xxx/qla_sup.c3
-rw-r--r--drivers/scsi/sg.c15
-rw-r--r--drivers/scsi/sr.c29
-rw-r--r--drivers/scsi/vmw_pvscsi.c11
-rw-r--r--drivers/staging/android/ashmem.c2
-rw-r--r--drivers/target/iscsi/cxgbit/cxgbit_target.c16
-rw-r--r--drivers/vhost/vhost.c9
-rw-r--r--drivers/video/fbdev/efifb.c51
-rw-r--r--drivers/virtio/virtio_balloon.c2
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/dcache.c13
-rw-r--r--fs/efivarfs/inode.c4
-rw-r--r--fs/iomap.c2
-rw-r--r--fs/jfs/jfs_dinode.h7
-rw-r--r--fs/jfs/jfs_incore.h1
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/namespace.c28
-rw-r--r--fs/nfs/nfs4proc.c26
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/squashfs/file.c50
-rw-r--r--fs/squashfs/file_cache.c4
-rw-r--r--fs/squashfs/file_direct.c24
-rw-r--r--fs/squashfs/fragment.c13
-rw-r--r--fs/squashfs/squashfs.h3
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/timerfd.c8
-rw-r--r--fs/userfaultfd.c4
-rw-r--r--include/asm-generic/atomic-instrumented.h197
-rw-r--r--include/asm-generic/atomic.h33
-rw-r--r--include/asm-generic/atomic64.h15
-rw-r--r--include/asm-generic/bitops/atomic.h188
-rw-r--r--include/asm-generic/bitops/lock.h68
-rw-r--r--include/asm-generic/pgtable.h8
-rw-r--r--include/asm-generic/tlb.h10
-rw-r--r--include/linux/atomic.h453
-rw-r--r--include/linux/bitops.h22
-rw-r--r--include/linux/bits.h26
-rw-r--r--include/linux/clocksource.h3
-rw-r--r--include/linux/compat.h9
-rw-r--r--include/linux/compat_time.h9
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/efi.h15
-rw-r--r--include/linux/irqchip/arm-gic-v3.h3
-rw-r--r--include/linux/ktime.h7
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/mm_types.h241
-rw-r--r--include/linux/nmi.h10
-rw-r--r--include/linux/pci.h1
-rw-r--r--include/linux/posix-timers.h4
-rw-r--r--include/linux/pti.h1
-rw-r--r--include/linux/rculist.h19
-rw-r--r--include/linux/rcupdate.h20
-rw-r--r--include/linux/rcutiny.h2
-rw-r--r--include/linux/refcount.h34
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/sched/sysctl.h1
-rw-r--r--include/linux/sched_clock.h5
-rw-r--r--include/linux/smpboot.h15
-rw-r--r--include/linux/spinlock.h53
-rw-r--r--include/linux/srcu.h17
-rw-r--r--include/linux/swait.h36
-rw-r--r--include/linux/syscalls.h10
-rw-r--r--include/linux/time.h4
-rw-r--r--include/linux/time64.h1
-rw-r--r--include/linux/timekeeping.h5
-rw-r--r--include/linux/torture.h4
-rw-r--r--include/net/af_vsock.h4
-rw-r--r--include/net/llc.h5
-rw-r--r--include/trace/events/rcu.h112
-rw-r--r--include/uapi/linux/time.h7
-rw-r--r--init/main.c13
-rw-r--r--ipc/shm.c12
-rw-r--r--kernel/auditsc.c13
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/btf.c14
-rw-r--r--kernel/bpf/cpumap.c15
-rw-r--r--kernel/bpf/devmap.c14
-rw-r--r--kernel/bpf/sockmap.c9
-rw-r--r--kernel/bpf/syscall.c4
-rw-r--r--kernel/compat.c29
-rw-r--r--kernel/cpu.c9
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/irq/Kconfig1
-rw-r--r--kernel/irq/irqdesc.c13
-rw-r--r--kernel/irq/manage.c56
-rw-r--r--kernel/irq/proc.c22
-rw-r--r--kernel/kthread.c6
-rw-r--r--kernel/locking/locktorture.c5
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/rcu/rcu.h104
-rw-r--r--kernel/rcu/rcuperf.c57
-rw-r--r--kernel/rcu/rcutorture.c462
-rw-r--r--kernel/rcu/srcutiny.c4
-rw-r--r--kernel/rcu/srcutree.c39
-rw-r--r--kernel/rcu/tiny.c4
-rw-r--r--kernel/rcu/tree.c1027
-rw-r--r--kernel/rcu/tree.h71
-rw-r--r--kernel/rcu/tree_exp.h18
-rw-r--r--kernel/rcu/tree_plugin.h188
-rw-r--r--kernel/rcu/update.c45
-rw-r--r--kernel/sched/Makefile2
-rw-r--r--kernel/sched/clock.c57
-rw-r--r--kernel/sched/completion.c8
-rw-r--r--kernel/sched/core.c144
-rw-r--r--kernel/sched/cpufreq_schedutil.c103
-rw-r--r--kernel/sched/deadline.c8
-rw-r--r--kernel/sched/debug.c37
-rw-r--r--kernel/sched/fair.c663
-rw-r--r--kernel/sched/pelt.c399
-rw-r--r--kernel/sched/pelt.h72
-rw-r--r--kernel/sched/rt.c15
-rw-r--r--kernel/sched/sched.h87
-rw-r--r--kernel/sched/swait.c32
-rw-r--r--kernel/sched/wait.c55
-rw-r--r--kernel/smpboot.c54
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c43
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/alarmtimer.c7
-rw-r--r--kernel/time/clockevents.c6
-rw-r--r--kernel/time/clocksource.c149
-rw-r--r--kernel/time/hrtimer.c7
-rw-r--r--kernel/time/ntp.c23
-rw-r--r--kernel/time/ntp_internal.h4
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/posix-stubs.c2
-rw-r--r--kernel/time/posix-timers.c55
-rw-r--r--kernel/time/posix-timers.h2
-rw-r--r--kernel/time/sched_clock.c2
-rw-r--r--kernel/time/tick-broadcast-hrtimer.c2
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/time.c31
-rw-r--r--kernel/time/timekeeping.c183
-rw-r--r--kernel/time/timekeeping_debug.c2
-rw-r--r--kernel/time/timekeeping_internal.h2
-rw-r--r--kernel/time/timer.c31
-rw-r--r--kernel/torture.c15
-rw-r--r--kernel/watchdog.c147
-rw-r--r--kernel/watchdog_hld.c4
-rw-r--r--lib/Kconfig.ubsan11
-rw-r--r--lib/atomic64.c14
-rw-r--r--lib/debugobjects.c10
-rw-r--r--lib/ioremap.c4
-rw-r--r--lib/refcount.c55
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/init-mm.c11
-rw-r--r--mm/memcontrol.c15
-rw-r--r--mm/memory.c31
-rw-r--r--mm/page_alloc.c16
-rw-r--r--net/atm/pppoatm.c2
-rw-r--r--net/core/dev.c17
-rw-r--r--net/core/filter.c12
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/core/xdp.c3
-rw-r--r--net/dccp/ccids/ccid2.c6
-rw-r--r--net/dsa/slave.c10
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/inet_fragment.c6
-rw-r--r--net/ipv4/ip_fragment.c5
-rw-r--r--net/ipv4/tcp_bbr.c4
-rw-r--r--net/ipv4/tcp_input.c9
-rw-r--r--net/ipv6/esp6.c4
-rw-r--r--net/ipv6/ip6_tunnel.c8
-rw-r--r--net/ipv6/ip6_vti.c11
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/l2tp/l2tp_ppp.c13
-rw-r--r--net/llc/llc_core.c4
-rw-r--r--net/netlink/af_netlink.c7
-rw-r--r--net/openvswitch/meter.c10
-rw-r--r--net/packet/af_packet.c10
-rw-r--r--net/rds/ib_frmr.c5
-rw-r--r--net/rds/ib_mr.h3
-rw-r--r--net/rds/ib_rdma.c21
-rw-r--r--net/rds/rdma.c13
-rw-r--r--net/rds/rds.h5
-rw-r--r--net/rds/send.c12
-rw-r--r--net/rxrpc/ar-internal.h8
-rw-r--r--net/rxrpc/call_accept.c4
-rw-r--r--net/rxrpc/call_object.c2
-rw-r--r--net/rxrpc/conn_event.c4
-rw-r--r--net/rxrpc/conn_object.c4
-rw-r--r--net/rxrpc/local_object.c2
-rw-r--r--net/rxrpc/net_ns.c6
-rw-r--r--net/rxrpc/output.c12
-rw-r--r--net/rxrpc/peer_event.c156
-rw-r--r--net/rxrpc/peer_object.c10
-rw-r--r--net/rxrpc/rxkad.c4
-rw-r--r--net/smc/af_smc.c15
-rw-r--r--net/smc/smc_cdc.c3
-rw-r--r--net/socket.c5
-rw-r--r--net/tipc/net.c4
-rw-r--r--net/vmw_vsock/af_vsock.c15
-rw-r--r--net/vmw_vsock/vmci_transport.c3
-rw-r--r--net/xdp/xsk.c4
-rw-r--r--net/xdp/xsk_queue.h2
-rw-r--r--net/xfrm/xfrm_policy.c3
-rw-r--r--net/xfrm/xfrm_user.c18
-rw-r--r--samples/bpf/xdp_redirect_cpu_kern.c2
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c4
-rw-r--r--scripts/Makefile.ubsan4
-rw-r--r--security/Kconfig2
-rw-r--r--tools/bpf/bpftool/map.c14
-rw-r--r--tools/include/uapi/linux/btf.h2
-rw-r--r--tools/lib/bpf/btf.c41
-rw-r--r--tools/lib/bpf/btf.h12
-rw-r--r--tools/lib/bpf/libbpf.c87
-rw-r--r--tools/lib/bpf/libbpf.h4
-rw-r--r--tools/memory-model/Documentation/explanation.txt2
-rw-r--r--tools/memory-model/Documentation/recipes.txt12
-rw-r--r--tools/memory-model/README20
-rw-r--r--tools/memory-model/linux-kernel.bell2
-rw-r--r--tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus (renamed from tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus)2
-rw-r--r--tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus2
-rw-r--r--tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus (renamed from tools/memory-model/litmus-tests/LB+ctrlonceonce+mbonceonce.litmus)2
-rw-r--r--tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus (renamed from tools/memory-model/litmus-tests/MP+wmbonceonce+rmbonceonce.litmus)2
-rw-r--r--tools/memory-model/litmus-tests/R+fencembonceonces.litmus (renamed from tools/memory-model/litmus-tests/R+mbonceonces.litmus)2
-rw-r--r--tools/memory-model/litmus-tests/README25
-rw-r--r--tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus (renamed from tools/memory-model/litmus-tests/S+wmbonceonce+poacquireonce.litmus)2
-rw-r--r--tools/memory-model/litmus-tests/SB+fencembonceonces.litmus (renamed from tools/memory-model/litmus-tests/SB+mbonceonces.litmus)2
-rw-r--r--tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus32
-rw-r--r--tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus (renamed from tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus)2
-rw-r--r--tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus (renamed from tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus)2
-rwxr-xr-x[-rw-r--r--]tools/memory-model/scripts/checkalllitmus.sh2
-rwxr-xr-x[-rw-r--r--]tools/memory-model/scripts/checklitmus.sh2
-rw-r--r--tools/objtool/arch/x86/include/asm/orc_types.h2
-rw-r--r--tools/objtool/check.c1
-rw-r--r--tools/objtool/check.h2
-rw-r--r--tools/objtool/orc_dump.c3
-rw-r--r--tools/objtool/orc_gen.c2
-rw-r--r--tools/power/x86/turbostat/turbostat.84
-rw-r--r--tools/power/x86/turbostat/turbostat.c120
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h9
-rw-r--r--tools/testing/selftests/bpf/test_btf.c114
-rw-r--r--tools/testing/selftests/bpf/test_btf_haskv.c7
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh6
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c2
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh26
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh11
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh2
-rw-r--r--tools/testing/selftests/timers/raw_skew.c5
-rw-r--r--tools/virtio/asm/barrier.h4
-rw-r--r--tools/virtio/linux/kernel.h5
-rw-r--r--virt/kvm/arm/arm.c4
-rw-r--r--virt/kvm/arm/psci.c2
-rw-r--r--virt/kvm/async_pf.c2
-rw-r--r--virt/kvm/kvm_main.c4
637 files changed, 13267 insertions, 8688 deletions
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index 6c06e10bd04b..f5120a00f511 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -380,31 +380,26 @@ and therefore need no protection.
380as follows: 380as follows:
381 381
382<pre> 382<pre>
383 1 unsigned long gpnum; 383 1 unsigned long gp_seq;
384 2 unsigned long completed;
385</pre> 384</pre>
386 385
387<p>RCU grace periods are numbered, and 386<p>RCU grace periods are numbered, and
388the <tt>-&gt;gpnum</tt> field contains the number of the grace 387the <tt>-&gt;gp_seq</tt> field contains the current grace-period
389period that started most recently. 388sequence number.
390The <tt>-&gt;completed</tt> field contains the number of the 389The bottom two bits are the state of the current grace period,
391grace period that completed most recently. 390which can be zero for not yet started or one for in progress.
392If the two fields are equal, the RCU grace period that most recently 391In other words, if the bottom two bits of <tt>-&gt;gp_seq</tt> are
393started has already completed, and therefore the corresponding 392zero, the corresponding flavor of RCU is idle.
394flavor of RCU is idle. 393Any other value in the bottom two bits indicates that something is broken.
395If <tt>-&gt;gpnum</tt> is one greater than <tt>-&gt;completed</tt>, 394This field is protected by the root <tt>rcu_node</tt> structure's
396then <tt>-&gt;gpnum</tt> gives the number of the current RCU
397grace period, which has not yet completed.
398Any other combination of values indicates that something is broken.
399These two fields are protected by the root <tt>rcu_node</tt>'s
400<tt>-&gt;lock</tt> field. 395<tt>-&gt;lock</tt> field.
401 396
402</p><p>There are <tt>-&gt;gpnum</tt> and <tt>-&gt;completed</tt> fields 397</p><p>There are <tt>-&gt;gp_seq</tt> fields
403in the <tt>rcu_node</tt> and <tt>rcu_data</tt> structures 398in the <tt>rcu_node</tt> and <tt>rcu_data</tt> structures
404as well. 399as well.
405The fields in the <tt>rcu_state</tt> structure represent the 400The fields in the <tt>rcu_state</tt> structure represent the
406most current values, and those of the other structures are compared 401most current value, and those of the other structures are compared
407in order to detect the start of a new grace period in a distributed 402in order to detect the beginnings and ends of grace periods in a distributed
408fashion. 403fashion.
409The values flow from <tt>rcu_state</tt> to <tt>rcu_node</tt> 404The values flow from <tt>rcu_state</tt> to <tt>rcu_node</tt>
410(down the tree from the root to the leaves) to <tt>rcu_data</tt>. 405(down the tree from the root to the leaves) to <tt>rcu_data</tt>.
@@ -512,27 +507,47 @@ than to be heisenbugged out of existence.
512as follows: 507as follows:
513 508
514<pre> 509<pre>
515 1 unsigned long gpnum; 510 1 unsigned long gp_seq;
516 2 unsigned long completed; 511 2 unsigned long gp_seq_needed;
517</pre> 512</pre>
518 513
519<p>These fields are the counterparts of the fields of the same name in 514<p>The <tt>rcu_node</tt> structures' <tt>-&gt;gp_seq</tt> fields are
520the <tt>rcu_state</tt> structure. 515the counterparts of the field of the same name in the <tt>rcu_state</tt>
521They each may lag up to one behind their <tt>rcu_state</tt> 516structure.
522counterparts. 517They each may lag up to one step behind their <tt>rcu_state</tt>
523If a given <tt>rcu_node</tt> structure's <tt>-&gt;gpnum</tt> and 518counterpart.
524<tt>-&gt;complete</tt> fields are equal, then this <tt>rcu_node</tt> 519If the bottom two bits of a given <tt>rcu_node</tt> structure's
520<tt>-&gt;gp_seq</tt> field is zero, then this <tt>rcu_node</tt>
525structure believes that RCU is idle. 521structure believes that RCU is idle.
526Otherwise, as with the <tt>rcu_state</tt> structure, 522</p><p>The <tt>&gt;gp_seq</tt> field of each <tt>rcu_node</tt>
527the <tt>-&gt;gpnum</tt> field will be one greater than the 523structure is updated at the beginning and the end
528<tt>-&gt;complete</tt> fields, with <tt>-&gt;gpnum</tt> 524of each grace period.
529indicating which grace period this <tt>rcu_node</tt> believes 525
530is still being waited for. 526<p>The <tt>-&gt;gp_seq_needed</tt> fields record the
527furthest-in-the-future grace period request seen by the corresponding
528<tt>rcu_node</tt> structure. The request is considered fulfilled when
529the value of the <tt>-&gt;gp_seq</tt> field equals or exceeds that of
530the <tt>-&gt;gp_seq_needed</tt> field.
531 531
532</p><p>The <tt>&gt;gpnum</tt> field of each <tt>rcu_node</tt> 532<table>
533structure is updated at the beginning 533<tr><th>&nbsp;</th></tr>
534of each grace period, and the <tt>-&gt;completed</tt> fields are 534<tr><th align="left">Quick Quiz:</th></tr>
535updated at the end of each grace period. 535<tr><td>
536 Suppose that this <tt>rcu_node</tt> structure doesn't see
537 a request for a very long time.
538 Won't wrapping of the <tt>-&gt;gp_seq</tt> field cause
539 problems?
540</td></tr>
541<tr><th align="left">Answer:</th></tr>
542<tr><td bgcolor="#ffffff"><font color="ffffff">
543 No, because if the <tt>-&gt;gp_seq_needed</tt> field lags behind the
544 <tt>-&gt;gp_seq</tt> field, the <tt>-&gt;gp_seq_needed</tt> field
545 will be updated at the end of the grace period.
546 Modulo-arithmetic comparisons therefore will always get the
547 correct answer, even with wrapping.
548</font></td></tr>
549<tr><td>&nbsp;</td></tr>
550</table>
536 551
537<h5>Quiescent-State Tracking</h5> 552<h5>Quiescent-State Tracking</h5>
538 553
@@ -626,9 +641,8 @@ normal and expedited grace periods, respectively.
626 </ol> 641 </ol>
627 642
628 <p><font color="ffffff">So the locking is absolutely required in 643 <p><font color="ffffff">So the locking is absolutely required in
629 order to coordinate 644 order to coordinate clearing of the bits with updating of the
630 clearing of the bits with the grace-period numbers in 645 grace-period sequence number in <tt>-&gt;gp_seq</tt>.
631 <tt>-&gt;gpnum</tt> and <tt>-&gt;completed</tt>.
632</font></td></tr> 646</font></td></tr>
633<tr><td>&nbsp;</td></tr> 647<tr><td>&nbsp;</td></tr>
634</table> 648</table>
@@ -1038,15 +1052,15 @@ out any <tt>rcu_data</tt> structure for which this flag is not set.
1038as follows: 1052as follows:
1039 1053
1040<pre> 1054<pre>
1041 1 unsigned long completed; 1055 1 unsigned long gp_seq;
1042 2 unsigned long gpnum; 1056 2 unsigned long gp_seq_needed;
1043 3 bool cpu_no_qs; 1057 3 bool cpu_no_qs;
1044 4 bool core_needs_qs; 1058 4 bool core_needs_qs;
1045 5 bool gpwrap; 1059 5 bool gpwrap;
1046 6 unsigned long rcu_qs_ctr_snap; 1060 6 unsigned long rcu_qs_ctr_snap;
1047</pre> 1061</pre>
1048 1062
1049<p>The <tt>completed</tt> and <tt>gpnum</tt> 1063<p>The <tt>-&gt;gp_seq</tt> and <tt>-&gt;gp_seq_needed</tt>
1050fields are the counterparts of the fields of the same name 1064fields are the counterparts of the fields of the same name
1051in the <tt>rcu_state</tt> and <tt>rcu_node</tt> structures. 1065in the <tt>rcu_state</tt> and <tt>rcu_node</tt> structures.
1052They may each lag up to one behind their <tt>rcu_node</tt> 1066They may each lag up to one behind their <tt>rcu_node</tt>
@@ -1054,15 +1068,9 @@ counterparts, but in <tt>CONFIG_NO_HZ_IDLE</tt> and
1054<tt>CONFIG_NO_HZ_FULL</tt> kernels can lag 1068<tt>CONFIG_NO_HZ_FULL</tt> kernels can lag
1055arbitrarily far behind for CPUs in dyntick-idle mode (but these counters 1069arbitrarily far behind for CPUs in dyntick-idle mode (but these counters
1056will catch up upon exit from dyntick-idle mode). 1070will catch up upon exit from dyntick-idle mode).
1057If a given <tt>rcu_data</tt> structure's <tt>-&gt;gpnum</tt> and 1071If the lower two bits of a given <tt>rcu_data</tt> structure's
1058<tt>-&gt;complete</tt> fields are equal, then this <tt>rcu_data</tt> 1072<tt>-&gt;gp_seq</tt> are zero, then this <tt>rcu_data</tt>
1059structure believes that RCU is idle. 1073structure believes that RCU is idle.
1060Otherwise, as with the <tt>rcu_state</tt> and <tt>rcu_node</tt>
1061structure,
1062the <tt>-&gt;gpnum</tt> field will be one greater than the
1063<tt>-&gt;complete</tt> fields, with <tt>-&gt;gpnum</tt>
1064indicating which grace period this <tt>rcu_data</tt> believes
1065is still being waited for.
1066 1074
1067<table> 1075<table>
1068<tr><th>&nbsp;</th></tr> 1076<tr><th>&nbsp;</th></tr>
@@ -1070,13 +1078,13 @@ is still being waited for.
1070<tr><td> 1078<tr><td>
1071 All this replication of the grace period numbers can only cause 1079 All this replication of the grace period numbers can only cause
1072 massive confusion. 1080 massive confusion.
1073 Why not just keep a global pair of counters and be done with it??? 1081 Why not just keep a global sequence number and be done with it???
1074</td></tr> 1082</td></tr>
1075<tr><th align="left">Answer:</th></tr> 1083<tr><th align="left">Answer:</th></tr>
1076<tr><td bgcolor="#ffffff"><font color="ffffff"> 1084<tr><td bgcolor="#ffffff"><font color="ffffff">
1077 Because if there was only a single global pair of grace-period 1085 Because if there was only a single global sequence
1078 numbers, there would need to be a single global lock to allow 1086 numbers, there would need to be a single global lock to allow
1079 safely accessing and updating them. 1087 safely accessing and updating it.
1080 And if we are not going to have a single global lock, we need 1088 And if we are not going to have a single global lock, we need
1081 to carefully manage the numbers on a per-node basis. 1089 to carefully manage the numbers on a per-node basis.
1082 Recall from the answer to a previous Quick Quiz that the consequences 1090 Recall from the answer to a previous Quick Quiz that the consequences
@@ -1091,8 +1099,8 @@ CPU has not yet passed through a quiescent state,
1091while the <tt>-&gt;core_needs_qs</tt> flag indicates that the 1099while the <tt>-&gt;core_needs_qs</tt> flag indicates that the
1092RCU core needs a quiescent state from the corresponding CPU. 1100RCU core needs a quiescent state from the corresponding CPU.
1093The <tt>-&gt;gpwrap</tt> field indicates that the corresponding 1101The <tt>-&gt;gpwrap</tt> field indicates that the corresponding
1094CPU has remained idle for so long that the <tt>completed</tt> 1102CPU has remained idle for so long that the
1095and <tt>gpnum</tt> counters are in danger of overflow, which 1103<tt>gp_seq</tt> counter is in danger of overflow, which
1096will cause the CPU to disregard the values of its counters on 1104will cause the CPU to disregard the values of its counters on
1097its next exit from idle. 1105its next exit from idle.
1098Finally, the <tt>rcu_qs_ctr_snap</tt> field is used to detect 1106Finally, the <tt>rcu_qs_ctr_snap</tt> field is used to detect
@@ -1130,10 +1138,10 @@ The CPU advances the callbacks in its <tt>rcu_data</tt> structure
1130whenever it notices that another RCU grace period has completed. 1138whenever it notices that another RCU grace period has completed.
1131The CPU detects the completion of an RCU grace period by noticing 1139The CPU detects the completion of an RCU grace period by noticing
1132that the value of its <tt>rcu_data</tt> structure's 1140that the value of its <tt>rcu_data</tt> structure's
1133<tt>-&gt;completed</tt> field differs from that of its leaf 1141<tt>-&gt;gp_seq</tt> field differs from that of its leaf
1134<tt>rcu_node</tt> structure. 1142<tt>rcu_node</tt> structure.
1135Recall that each <tt>rcu_node</tt> structure's 1143Recall that each <tt>rcu_node</tt> structure's
1136<tt>-&gt;completed</tt> field is updated at the end of each 1144<tt>-&gt;gp_seq</tt> field is updated at the beginnings and ends of each
1137grace period. 1145grace period.
1138 1146
1139<p> 1147<p>
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
index 8651b0b4fd79..a346ce0116eb 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
@@ -357,7 +357,7 @@ parts, starting in this section with the various phases of
357grace-period initialization. 357grace-period initialization.
358 358
359<p>The first ordering-related grace-period initialization action is to 359<p>The first ordering-related grace-period initialization action is to
360increment the <tt>rcu_state</tt> structure's <tt>-&gt;gpnum</tt> 360advance the <tt>rcu_state</tt> structure's <tt>-&gt;gp_seq</tt>
361grace-period-number counter, as shown below: 361grace-period-number counter, as shown below:
362 362
363</p><p><img src="TreeRCU-gp-init-1.svg" alt="TreeRCU-gp-init-1.svg" width="75%"> 363</p><p><img src="TreeRCU-gp-init-1.svg" alt="TreeRCU-gp-init-1.svg" width="75%">
@@ -388,7 +388,7 @@ its last CPU and if the next <tt>rcu_node</tt> structure has no online CPUs).
388 388
389<p>The final <tt>rcu_gp_init()</tt> pass through the <tt>rcu_node</tt> 389<p>The final <tt>rcu_gp_init()</tt> pass through the <tt>rcu_node</tt>
390tree traverses breadth-first, setting each <tt>rcu_node</tt> structure's 390tree traverses breadth-first, setting each <tt>rcu_node</tt> structure's
391<tt>-&gt;gpnum</tt> field to the newly incremented value from the 391<tt>-&gt;gp_seq</tt> field to the newly advanced value from the
392<tt>rcu_state</tt> structure, as shown in the following diagram. 392<tt>rcu_state</tt> structure, as shown in the following diagram.
393 393
394</p><p><img src="TreeRCU-gp-init-3.svg" alt="TreeRCU-gp-init-1.svg" width="75%"> 394</p><p><img src="TreeRCU-gp-init-3.svg" alt="TreeRCU-gp-init-1.svg" width="75%">
@@ -398,9 +398,9 @@ tree traverses breadth-first, setting each <tt>rcu_node</tt> structure's
398to notice that a new grace period has started, as described in the next 398to notice that a new grace period has started, as described in the next
399section. 399section.
400But because the grace-period kthread started the grace period at the 400But because the grace-period kthread started the grace period at the
401root (with the increment of the <tt>rcu_state</tt> structure's 401root (with the advancing of the <tt>rcu_state</tt> structure's
402<tt>-&gt;gpnum</tt> field) before setting each leaf <tt>rcu_node</tt> 402<tt>-&gt;gp_seq</tt> field) before setting each leaf <tt>rcu_node</tt>
403structure's <tt>-&gt;gpnum</tt> field, each CPU's observation of 403structure's <tt>-&gt;gp_seq</tt> field, each CPU's observation of
404the start of the grace period will happen after the actual start 404the start of the grace period will happen after the actual start
405of the grace period. 405of the grace period.
406 406
@@ -466,7 +466,7 @@ section that the grace period must wait on.
466<tr><td> 466<tr><td>
467 But a RCU read-side critical section might have started 467 But a RCU read-side critical section might have started
468 after the beginning of the grace period 468 after the beginning of the grace period
469 (the <tt>-&gt;gpnum++</tt> from earlier), so why should 469 (the advancing of <tt>-&gt;gp_seq</tt> from earlier), so why should
470 the grace period wait on such a critical section? 470 the grace period wait on such a critical section?
471</td></tr> 471</td></tr>
472<tr><th align="left">Answer:</th></tr> 472<tr><th align="left">Answer:</th></tr>
@@ -609,10 +609,8 @@ states outstanding from other CPUs.
609<h4><a name="Grace-Period Cleanup">Grace-Period Cleanup</a></h4> 609<h4><a name="Grace-Period Cleanup">Grace-Period Cleanup</a></h4>
610 610
611<p>Grace-period cleanup first scans the <tt>rcu_node</tt> tree 611<p>Grace-period cleanup first scans the <tt>rcu_node</tt> tree
612breadth-first setting all the <tt>-&gt;completed</tt> fields equal 612breadth-first advancing all the <tt>-&gt;gp_seq</tt> fields, then it
613to the number of the newly completed grace period, then it sets 613advances the <tt>rcu_state</tt> structure's <tt>-&gt;gp_seq</tt> field.
614the <tt>rcu_state</tt> structure's <tt>-&gt;completed</tt> field,
615again to the number of the newly completed grace period.
616The ordering effects are shown below: 614The ordering effects are shown below:
617 615
618</p><p><img src="TreeRCU-gp-cleanup.svg" alt="TreeRCU-gp-cleanup.svg" width="75%"> 616</p><p><img src="TreeRCU-gp-cleanup.svg" alt="TreeRCU-gp-cleanup.svg" width="75%">
@@ -634,7 +632,7 @@ grace-period cleanup is complete, the next grace period can begin.
634 CPU has reported its quiescent state, but it may be some 632 CPU has reported its quiescent state, but it may be some
635 milliseconds before RCU becomes aware of this. 633 milliseconds before RCU becomes aware of this.
636 The latest reasonable candidate is once the <tt>rcu_state</tt> 634 The latest reasonable candidate is once the <tt>rcu_state</tt>
637 structure's <tt>-&gt;completed</tt> field has been updated, 635 structure's <tt>-&gt;gp_seq</tt> field has been updated,
638 but it is quite possible that some CPUs have already completed 636 but it is quite possible that some CPUs have already completed
639 phase two of their updates by that time. 637 phase two of their updates by that time.
640 In short, if you are going to work with RCU, you need to 638 In short, if you are going to work with RCU, you need to
@@ -647,7 +645,7 @@ grace-period cleanup is complete, the next grace period can begin.
647<h4><a name="Callback Invocation">Callback Invocation</a></h4> 645<h4><a name="Callback Invocation">Callback Invocation</a></h4>
648 646
649<p>Once a given CPU's leaf <tt>rcu_node</tt> structure's 647<p>Once a given CPU's leaf <tt>rcu_node</tt> structure's
650<tt>-&gt;completed</tt> field has been updated, that CPU can begin 648<tt>-&gt;gp_seq</tt> field has been updated, that CPU can begin
651invoking its RCU callbacks that were waiting for this grace period 649invoking its RCU callbacks that were waiting for this grace period
652to end. 650to end.
653These callbacks are identified by <tt>rcu_advance_cbs()</tt>, 651These callbacks are identified by <tt>rcu_advance_cbs()</tt>,
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-cleanup.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-cleanup.svg
index 754f426b297a..bf84fbab27ee 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-cleanup.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-cleanup.svg
@@ -384,11 +384,11 @@
384 inkscape:window-height="1144" 384 inkscape:window-height="1144"
385 id="namedview208" 385 id="namedview208"
386 showgrid="true" 386 showgrid="true"
387 inkscape:zoom="0.70710678" 387 inkscape:zoom="0.78716603"
388 inkscape:cx="617.89017" 388 inkscape:cx="513.06403"
389 inkscape:cy="542.52419" 389 inkscape:cy="623.1214"
390 inkscape:window-x="86" 390 inkscape:window-x="102"
391 inkscape:window-y="28" 391 inkscape:window-y="38"
392 inkscape:window-maximized="0" 392 inkscape:window-maximized="0"
393 inkscape:current-layer="g3188-3" 393 inkscape:current-layer="g3188-3"
394 fit-margin-top="5" 394 fit-margin-top="5"
@@ -417,13 +417,15 @@
417 id="g3188"> 417 id="g3188">
418 <text 418 <text
419 xml:space="preserve" 419 xml:space="preserve"
420 x="3199.1516" 420 x="3145.9592"
421 y="13255.592" 421 y="13255.592"
422 font-style="normal" 422 font-style="normal"
423 font-weight="bold" 423 font-weight="bold"
424 font-size="192" 424 font-size="192"
425 id="text202" 425 id="text202"
426 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 426 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier"><tspan
427 style="font-size:172.87567139px"
428 id="tspan3143">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
427 <g 429 <g
428 id="g3107" 430 id="g3107"
429 transform="translate(947.90548,11584.029)"> 431 transform="translate(947.90548,11584.029)">
@@ -502,13 +504,15 @@
502 </g> 504 </g>
503 <text 505 <text
504 xml:space="preserve" 506 xml:space="preserve"
505 x="5324.5371" 507 x="5264.4731"
506 y="15414.598" 508 y="15428.84"
507 font-style="normal" 509 font-style="normal"
508 font-weight="bold" 510 font-weight="bold"
509 font-size="192" 511 font-size="192"
510 id="text202-753" 512 id="text202-36-7"
511 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 513 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
514 style="font-size:172.87567139px"
515 id="tspan3166-5">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
512 </g> 516 </g>
513 <g 517 <g
514 style="fill:none;stroke-width:0.025in" 518 style="fill:none;stroke-width:0.025in"
@@ -547,15 +551,6 @@
547 sodipodi:linespacing="125%"><tspan 551 sodipodi:linespacing="125%"><tspan
548 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans" 552 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans"
549 id="tspan3104-6-5-6-0">Leaf</tspan></text> 553 id="tspan3104-6-5-6-0">Leaf</tspan></text>
550 <text
551 xml:space="preserve"
552 x="7479.5796"
553 y="17699.943"
554 font-style="normal"
555 font-weight="bold"
556 font-size="192"
557 id="text202-9"
558 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text>
559 <path 554 <path
560 sodipodi:nodetypes="cc" 555 sodipodi:nodetypes="cc"
561 inkscape:connector-curvature="0" 556 inkscape:connector-curvature="0"
@@ -566,15 +561,6 @@
566 style="fill:none;stroke-width:0.025in" 561 style="fill:none;stroke-width:0.025in"
567 transform="translate(-737.93887,7732.6672)" 562 transform="translate(-737.93887,7732.6672)"
568 id="g3188-3"> 563 id="g3188-3">
569 <text
570 xml:space="preserve"
571 x="3225.7478"
572 y="13175.802"
573 font-style="normal"
574 font-weight="bold"
575 font-size="192"
576 id="text202-60"
577 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">rsp-&gt;completed =</text>
578 <g 564 <g
579 id="g3107-62" 565 id="g3107-62"
580 transform="translate(947.90548,11584.029)"> 566 transform="translate(947.90548,11584.029)">
@@ -607,15 +593,6 @@
607 sodipodi:linespacing="125%"><tspan 593 sodipodi:linespacing="125%"><tspan
608 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans" 594 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans"
609 id="tspan3104-6-5-7">Root</tspan></text> 595 id="tspan3104-6-5-7">Root</tspan></text>
610 <text
611 xml:space="preserve"
612 x="3225.7478"
613 y="13390.038"
614 font-style="normal"
615 font-weight="bold"
616 font-size="192"
617 id="text202-60-3"
618 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"> rnp-&gt;completed</text>
619 <flowRoot 596 <flowRoot
620 xml:space="preserve" 597 xml:space="preserve"
621 id="flowRoot3356" 598 id="flowRoot3356"
@@ -627,7 +604,18 @@
627 height="63.63961" 604 height="63.63961"
628 x="332.34018" 605 x="332.34018"
629 y="681.87292" /></flowRegion><flowPara 606 y="681.87292" /></flowRegion><flowPara
630 id="flowPara3362" /></flowRoot> </g> 607 id="flowPara3362" /></flowRoot> <text
608 xml:space="preserve"
609 x="3156.6121"
610 y="13317.754"
611 font-style="normal"
612 font-weight="bold"
613 font-size="192"
614 id="text202-36-6"
615 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
616 style="font-size:172.87567139px"
617 id="tspan3166-0">rcu_seq_end(&amp;rsp-&gt;gp_seq)</tspan></text>
618 </g>
631 <g 619 <g
632 style="fill:none;stroke-width:0.025in" 620 style="fill:none;stroke-width:0.025in"
633 transform="translate(-858.40227,7769.0342)" 621 transform="translate(-858.40227,7769.0342)"
@@ -859,6 +847,17 @@
859 id="path3414-8-3-6-6" 847 id="path3414-8-3-6-6"
860 inkscape:connector-curvature="0" 848 inkscape:connector-curvature="0"
861 sodipodi:nodetypes="cc" /> 849 sodipodi:nodetypes="cc" />
850 <text
851 xml:space="preserve"
852 x="7418.769"
853 y="17646.104"
854 font-style="normal"
855 font-weight="bold"
856 font-size="192"
857 id="text202-36-70"
858 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
859 style="font-size:172.87567139px"
860 id="tspan3166-93">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
862 </g> 861 </g>
863 <g 862 <g
864 transform="translate(-1642.5377,-11611.245)" 863 transform="translate(-1642.5377,-11611.245)"
@@ -887,13 +886,15 @@
887 </g> 886 </g>
888 <text 887 <text
889 xml:space="preserve" 888 xml:space="preserve"
890 x="5327.3057" 889 x="5274.1133"
891 y="15428.84" 890 y="15428.84"
892 font-style="normal" 891 font-style="normal"
893 font-weight="bold" 892 font-weight="bold"
894 font-size="192" 893 font-size="192"
895 id="text202-36" 894 id="text202-36"
896 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 895 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
896 style="font-size:172.87567139px"
897 id="tspan3166">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
897 </g> 898 </g>
898 <g 899 <g
899 transform="translate(-151.71746,-11647.612)" 900 transform="translate(-151.71746,-11647.612)"
@@ -972,13 +973,15 @@
972 id="tspan3104-6-5-6-0-92">Leaf</tspan></text> 973 id="tspan3104-6-5-6-0-92">Leaf</tspan></text>
973 <text 974 <text
974 xml:space="preserve" 975 xml:space="preserve"
975 x="7486.4907" 976 x="7408.5918"
976 y="17670.119" 977 y="17619.504"
977 font-style="normal" 978 font-style="normal"
978 font-weight="bold" 979 font-weight="bold"
979 font-size="192" 980 font-size="192"
980 id="text202-6" 981 id="text202-36-2"
981 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 982 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
983 style="font-size:172.87567139px"
984 id="tspan3166-9">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
982 </g> 985 </g>
983 <g 986 <g
984 transform="translate(-6817.1997,-11647.612)" 987 transform="translate(-6817.1997,-11647.612)"
@@ -1019,13 +1022,15 @@
1019 id="tspan3104-6-5-6-0-1">Leaf</tspan></text> 1022 id="tspan3104-6-5-6-0-1">Leaf</tspan></text>
1020 <text 1023 <text
1021 xml:space="preserve" 1024 xml:space="preserve"
1022 x="7474.1382" 1025 x="7416.8003"
1023 y="17688.926" 1026 y="17619.504"
1024 font-style="normal" 1027 font-style="normal"
1025 font-weight="bold" 1028 font-weight="bold"
1026 font-size="192" 1029 font-size="192"
1027 id="text202-5" 1030 id="text202-36-3"
1028 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 1031 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
1032 style="font-size:172.87567139px"
1033 id="tspan3166-56">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
1029 </g> 1034 </g>
1030 <path 1035 <path
1031 style="fill:none;stroke:#000000;stroke-width:13.29812908px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)" 1036 style="fill:none;stroke:#000000;stroke-width:13.29812908px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)"
@@ -1059,15 +1064,6 @@
1059 id="path3414-8-3-6" 1064 id="path3414-8-3-6"
1060 inkscape:connector-curvature="0" 1065 inkscape:connector-curvature="0"
1061 sodipodi:nodetypes="cc" /> 1066 sodipodi:nodetypes="cc" />
1062 <text
1063 xml:space="preserve"
1064 x="7318.9653"
1065 y="6031.6353"
1066 font-style="normal"
1067 font-weight="bold"
1068 font-size="192"
1069 id="text202-2"
1070 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text>
1071 <g 1067 <g
1072 style="fill:none;stroke-width:0.025in" 1068 style="fill:none;stroke-width:0.025in"
1073 id="g4504-3-9" 1069 id="g4504-3-9"
@@ -1123,4 +1119,15 @@
1123 id="path3134-9-0-3-5" 1119 id="path3134-9-0-3-5"
1124 d="m 6875.6003,15833.906 1595.7755,0" 1120 d="m 6875.6003,15833.906 1595.7755,0"
1125 style="fill:none;stroke:#969696;stroke-width:53.19251633;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Send-36)" /> 1121 style="fill:none;stroke:#969696;stroke-width:53.19251633;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Send-36)" />
1122 <text
1123 xml:space="preserve"
1124 x="7275.2612"
1125 y="5971.8916"
1126 font-style="normal"
1127 font-weight="bold"
1128 font-size="192"
1129 id="text202-36-1"
1130 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
1131 style="font-size:172.87567139px"
1132 id="tspan3166-2">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
1126</svg> 1133</svg>
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-1.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-1.svg
index 0161262904ec..8c207550818f 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-1.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-1.svg
@@ -272,13 +272,13 @@
272 inkscape:window-height="1144" 272 inkscape:window-height="1144"
273 id="namedview208" 273 id="namedview208"
274 showgrid="true" 274 showgrid="true"
275 inkscape:zoom="0.70710678" 275 inkscape:zoom="2.6330492"
276 inkscape:cx="617.89019" 276 inkscape:cx="524.82797"
277 inkscape:cy="636.57143" 277 inkscape:cy="519.31194"
278 inkscape:window-x="697" 278 inkscape:window-x="79"
279 inkscape:window-y="28" 279 inkscape:window-y="28"
280 inkscape:window-maximized="0" 280 inkscape:window-maximized="0"
281 inkscape:current-layer="svg2" 281 inkscape:current-layer="g3188"
282 fit-margin-top="5" 282 fit-margin-top="5"
283 fit-margin-right="5" 283 fit-margin-right="5"
284 fit-margin-left="5" 284 fit-margin-left="5"
@@ -305,13 +305,15 @@
305 id="g3188"> 305 id="g3188">
306 <text 306 <text
307 xml:space="preserve" 307 xml:space="preserve"
308 x="3305.5364" 308 x="3119.363"
309 y="13255.592" 309 y="13255.592"
310 font-style="normal" 310 font-style="normal"
311 font-weight="bold" 311 font-weight="bold"
312 font-size="192" 312 font-size="192"
313 id="text202" 313 id="text202"
314 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">rsp-&gt;gpnum++</text> 314 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier"><tspan
315 style="font-size:172.87567139px"
316 id="tspan3071">rcu_seq_start(rsp-&gt;gp_seq)</tspan></text>
315 <g 317 <g
316 id="g3107" 318 id="g3107"
317 transform="translate(947.90548,11584.029)"> 319 transform="translate(947.90548,11584.029)">
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-3.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-3.svg
index de6ecc51b00e..d24d7d555dbc 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-3.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-init-3.svg
@@ -19,7 +19,7 @@
19 id="svg2" 19 id="svg2"
20 version="1.1" 20 version="1.1"
21 inkscape:version="0.48.4 r9939" 21 inkscape:version="0.48.4 r9939"
22 sodipodi:docname="TreeRCU-gp-init-2.svg"> 22 sodipodi:docname="TreeRCU-gp-init-3.svg">
23 <metadata 23 <metadata
24 id="metadata212"> 24 id="metadata212">
25 <rdf:RDF> 25 <rdf:RDF>
@@ -257,18 +257,22 @@
257 inkscape:window-width="1087" 257 inkscape:window-width="1087"
258 inkscape:window-height="1144" 258 inkscape:window-height="1144"
259 id="namedview208" 259 id="namedview208"
260 showgrid="false" 260 showgrid="true"
261 inkscape:zoom="0.70710678" 261 inkscape:zoom="0.68224756"
262 inkscape:cx="617.89019" 262 inkscape:cx="617.89019"
263 inkscape:cy="625.84293" 263 inkscape:cy="625.84293"
264 inkscape:window-x="697" 264 inkscape:window-x="54"
265 inkscape:window-y="28" 265 inkscape:window-y="28"
266 inkscape:window-maximized="0" 266 inkscape:window-maximized="0"
267 inkscape:current-layer="svg2" 267 inkscape:current-layer="g3153"
268 fit-margin-top="5" 268 fit-margin-top="5"
269 fit-margin-right="5" 269 fit-margin-right="5"
270 fit-margin-left="5" 270 fit-margin-left="5"
271 fit-margin-bottom="5" /> 271 fit-margin-bottom="5">
272 <inkscape:grid
273 type="xygrid"
274 id="grid3090" />
275 </sodipodi:namedview>
272 <path 276 <path
273 sodipodi:nodetypes="cccccccccccccccccccccccc" 277 sodipodi:nodetypes="cccccccccccccccccccccccc"
274 inkscape:connector-curvature="0" 278 inkscape:connector-curvature="0"
@@ -281,13 +285,13 @@
281 id="g3188"> 285 id="g3188">
282 <text 286 <text
283 xml:space="preserve" 287 xml:space="preserve"
284 x="3305.5364" 288 x="3145.9592"
285 y="13255.592" 289 y="13255.592"
286 font-style="normal" 290 font-style="normal"
287 font-weight="bold" 291 font-weight="bold"
288 font-size="192" 292 font-size="192"
289 id="text202" 293 id="text202"
290 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 294 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
291 <g 295 <g
292 id="g3107" 296 id="g3107"
293 transform="translate(947.90548,11584.029)"> 297 transform="translate(947.90548,11584.029)">
@@ -366,13 +370,13 @@
366 </g> 370 </g>
367 <text 371 <text
368 xml:space="preserve" 372 xml:space="preserve"
369 x="5392.3345" 373 x="5253.6904"
370 y="15407.104" 374 y="15407.032"
371 font-style="normal" 375 font-style="normal"
372 font-weight="bold" 376 font-weight="bold"
373 font-size="192" 377 font-size="192"
374 id="text202-6" 378 id="text202-6"
375 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 379 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
376 </g> 380 </g>
377 <g 381 <g
378 style="fill:none;stroke-width:0.025in" 382 style="fill:none;stroke-width:0.025in"
@@ -413,13 +417,13 @@
413 id="tspan3104-6-5-6-0">Leaf</tspan></text> 417 id="tspan3104-6-5-6-0">Leaf</tspan></text>
414 <text 418 <text
415 xml:space="preserve" 419 xml:space="preserve"
416 x="7536.4883" 420 x="7415.4365"
417 y="17640.934" 421 y="17670.572"
418 font-style="normal" 422 font-style="normal"
419 font-weight="bold" 423 font-weight="bold"
420 font-size="192" 424 font-size="192"
421 id="text202-9" 425 id="text202-9"
422 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 426 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
423 </g> 427 </g>
424 <g 428 <g
425 transform="translate(-1642.5375,-11610.962)" 429 transform="translate(-1642.5375,-11610.962)"
@@ -448,13 +452,13 @@
448 </g> 452 </g>
449 <text 453 <text
450 xml:space="preserve" 454 xml:space="preserve"
451 x="5378.4146" 455 x="5258.0688"
452 y="15436.927" 456 y="15412.313"
453 font-style="normal" 457 font-style="normal"
454 font-weight="bold" 458 font-weight="bold"
455 font-size="192" 459 font-size="192"
456 id="text202-3" 460 id="text202-3"
457 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 461 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
458 </g> 462 </g>
459 <g 463 <g
460 transform="translate(-151.71726,-11647.329)" 464 transform="translate(-151.71726,-11647.329)"
@@ -533,13 +537,13 @@
533 id="tspan3104-6-5-6-0-92">Leaf</tspan></text> 537 id="tspan3104-6-5-6-0-92">Leaf</tspan></text>
534 <text 538 <text
535 xml:space="preserve" 539 xml:space="preserve"
536 x="7520.1294" 540 x="7405.2607"
537 y="17673.639" 541 y="17670.572"
538 font-style="normal" 542 font-style="normal"
539 font-weight="bold" 543 font-weight="bold"
540 font-size="192" 544 font-size="192"
541 id="text202-35" 545 id="text202-35"
542 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 546 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
543 </g> 547 </g>
544 <g 548 <g
545 transform="translate(-6817.1998,-11647.329)" 549 transform="translate(-6817.1998,-11647.329)"
@@ -580,13 +584,13 @@
580 id="tspan3104-6-5-6-0-1">Leaf</tspan></text> 584 id="tspan3104-6-5-6-0-1">Leaf</tspan></text>
581 <text 585 <text
582 xml:space="preserve" 586 xml:space="preserve"
583 x="7521.4663" 587 x="7413.4688"
584 y="17666.062" 588 y="17670.566"
585 font-style="normal" 589 font-style="normal"
586 font-weight="bold" 590 font-weight="bold"
587 font-size="192" 591 font-size="192"
588 id="text202-75" 592 id="text202-75"
589 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 593 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
590 </g> 594 </g>
591 <path 595 <path
592 style="fill:none;stroke:#000000;stroke-width:13.29812908px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)" 596 style="fill:none;stroke:#000000;stroke-width:13.29812908px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)"
@@ -622,11 +626,11 @@
622 sodipodi:nodetypes="cc" /> 626 sodipodi:nodetypes="cc" />
623 <text 627 <text
624 xml:space="preserve" 628 xml:space="preserve"
625 x="7370.856" 629 x="7271.9297"
626 y="5997.5972" 630 y="6023.2412"
627 font-style="normal" 631 font-style="normal"
628 font-weight="bold" 632 font-weight="bold"
629 font-size="192" 633 font-size="192"
630 id="text202-62" 634 id="text202-62"
631 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 635 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
632</svg> 636</svg>
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
index b13b7b01bb3a..acd73c7ad0f4 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
@@ -1070,13 +1070,13 @@
1070 inkscape:window-height="1144" 1070 inkscape:window-height="1144"
1071 id="namedview208" 1071 id="namedview208"
1072 showgrid="true" 1072 showgrid="true"
1073 inkscape:zoom="0.6004608" 1073 inkscape:zoom="0.81932583"
1074 inkscape:cx="826.65969" 1074 inkscape:cx="840.45848"
1075 inkscape:cy="483.3047" 1075 inkscape:cy="5052.4242"
1076 inkscape:window-x="66" 1076 inkscape:window-x="787"
1077 inkscape:window-y="28" 1077 inkscape:window-y="24"
1078 inkscape:window-maximized="0" 1078 inkscape:window-maximized="0"
1079 inkscape:current-layer="svg2" 1079 inkscape:current-layer="g4"
1080 fit-margin-top="5" 1080 fit-margin-top="5"
1081 fit-margin-right="5" 1081 fit-margin-right="5"
1082 fit-margin-left="5" 1082 fit-margin-left="5"
@@ -1543,15 +1543,6 @@
1543 style="fill:none;stroke-width:0.025in" 1543 style="fill:none;stroke-width:0.025in"
1544 transform="translate(1749.0282,658.72243)" 1544 transform="translate(1749.0282,658.72243)"
1545 id="g3188"> 1545 id="g3188">
1546 <text
1547 xml:space="preserve"
1548 x="3305.5364"
1549 y="13255.592"
1550 font-style="normal"
1551 font-weight="bold"
1552 font-size="192"
1553 id="text202-5"
1554 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">rsp-&gt;gpnum++</text>
1555 <g 1546 <g
1556 id="g3107-62" 1547 id="g3107-62"
1557 transform="translate(947.90548,11584.029)"> 1548 transform="translate(947.90548,11584.029)">
@@ -1584,6 +1575,17 @@
1584 sodipodi:linespacing="125%"><tspan 1575 sodipodi:linespacing="125%"><tspan
1585 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans" 1576 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans"
1586 id="tspan3104-6-5-7">Root</tspan></text> 1577 id="tspan3104-6-5-7">Root</tspan></text>
1578 <text
1579 xml:space="preserve"
1580 x="3137.9988"
1581 y="13271.316"
1582 font-style="normal"
1583 font-weight="bold"
1584 font-size="192"
1585 id="text202-626"
1586 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
1587 style="font-size:172.87567139px"
1588 id="tspan3071">rcu_seq_start(rsp-&gt;gp_seq)</tspan></text>
1587 </g> 1589 </g>
1588 <rect 1590 <rect
1589 ry="0" 1591 ry="0"
@@ -2318,15 +2320,6 @@
2318 style="fill:none;stroke-width:0.025in" 2320 style="fill:none;stroke-width:0.025in"
2319 transform="translate(1739.0986,17188.625)" 2321 transform="translate(1739.0986,17188.625)"
2320 id="g3188-6"> 2322 id="g3188-6">
2321 <text
2322 xml:space="preserve"
2323 x="3305.5364"
2324 y="13255.592"
2325 font-style="normal"
2326 font-weight="bold"
2327 font-size="192"
2328 id="text202-1"
2329 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text>
2330 <g 2323 <g
2331 id="g3107-5" 2324 id="g3107-5"
2332 transform="translate(947.90548,11584.029)"> 2325 transform="translate(947.90548,11584.029)">
@@ -2359,6 +2352,15 @@
2359 sodipodi:linespacing="125%"><tspan 2352 sodipodi:linespacing="125%"><tspan
2360 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans" 2353 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans"
2361 id="tspan3104-6-5-1">Root</tspan></text> 2354 id="tspan3104-6-5-1">Root</tspan></text>
2355 <text
2356 xml:space="preserve"
2357 x="3147.9268"
2358 y="13240.524"
2359 font-style="normal"
2360 font-weight="bold"
2361 font-size="192"
2362 id="text202-1"
2363 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
2362 </g> 2364 </g>
2363 <g 2365 <g
2364 style="fill:none;stroke-width:0.025in" 2366 style="fill:none;stroke-width:0.025in"
@@ -2387,13 +2389,13 @@
2387 </g> 2389 </g>
2388 <text 2390 <text
2389 xml:space="preserve" 2391 xml:space="preserve"
2390 x="5392.3345" 2392 x="5263.1094"
2391 y="15407.104" 2393 y="15411.646"
2392 font-style="normal" 2394 font-style="normal"
2393 font-weight="bold" 2395 font-weight="bold"
2394 font-size="192" 2396 font-size="192"
2395 id="text202-6-7" 2397 id="text202-92"
2396 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 2398 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
2397 </g> 2399 </g>
2398 <g 2400 <g
2399 style="fill:none;stroke-width:0.025in" 2401 style="fill:none;stroke-width:0.025in"
@@ -2434,13 +2436,13 @@
2434 id="tspan3104-6-5-6-0-94">Leaf</tspan></text> 2436 id="tspan3104-6-5-6-0-94">Leaf</tspan></text>
2435 <text 2437 <text
2436 xml:space="preserve" 2438 xml:space="preserve"
2437 x="7536.4883" 2439 x="7417.4053"
2438 y="17640.934" 2440 y="17655.502"
2439 font-style="normal" 2441 font-style="normal"
2440 font-weight="bold" 2442 font-weight="bold"
2441 font-size="192" 2443 font-size="192"
2442 id="text202-9" 2444 id="text202-759"
2443 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 2445 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
2444 </g> 2446 </g>
2445 <g 2447 <g
2446 transform="translate(-2353.8462,17224.992)" 2448 transform="translate(-2353.8462,17224.992)"
@@ -2469,13 +2471,13 @@
2469 </g> 2471 </g>
2470 <text 2472 <text
2471 xml:space="preserve" 2473 xml:space="preserve"
2472 x="5378.4146" 2474 x="5246.1548"
2473 y="15436.927" 2475 y="15411.648"
2474 font-style="normal" 2476 font-style="normal"
2475 font-weight="bold" 2477 font-weight="bold"
2476 font-size="192" 2478 font-size="192"
2477 id="text202-3" 2479 id="text202-87"
2478 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 2480 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
2479 </g> 2481 </g>
2480 <g 2482 <g
2481 transform="translate(-863.02613,17188.625)" 2483 transform="translate(-863.02613,17188.625)"
@@ -2554,13 +2556,13 @@
2554 id="tspan3104-6-5-6-0-92-6">Leaf</tspan></text> 2556 id="tspan3104-6-5-6-0-92-6">Leaf</tspan></text>
2555 <text 2557 <text
2556 xml:space="preserve" 2558 xml:space="preserve"
2557 x="7520.1294" 2559 x="7433.8257"
2558 y="17673.639" 2560 y="17682.098"
2559 font-style="normal" 2561 font-style="normal"
2560 font-weight="bold" 2562 font-weight="bold"
2561 font-size="192" 2563 font-size="192"
2562 id="text202-35" 2564 id="text202-2"
2563 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 2565 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
2564 </g> 2566 </g>
2565 <g 2567 <g
2566 transform="translate(-7528.5085,17188.625)" 2568 transform="translate(-7528.5085,17188.625)"
@@ -2601,13 +2603,13 @@
2601 id="tspan3104-6-5-6-0-1-8">Leaf</tspan></text> 2603 id="tspan3104-6-5-6-0-1-8">Leaf</tspan></text>
2602 <text 2604 <text
2603 xml:space="preserve" 2605 xml:space="preserve"
2604 x="7521.4663" 2606 x="7415.4404"
2605 y="17666.062" 2607 y="17682.098"
2606 font-style="normal" 2608 font-style="normal"
2607 font-weight="bold" 2609 font-weight="bold"
2608 font-size="192" 2610 font-size="192"
2609 id="text202-75-1" 2611 id="text202-0"
2610 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text> 2612 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
2611 </g> 2613 </g>
2612 <path 2614 <path
2613 style="fill:none;stroke:#000000;stroke-width:13.29812813px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)" 2615 style="fill:none;stroke:#000000;stroke-width:13.29812813px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)"
@@ -2641,15 +2643,6 @@
2641 id="path3414-8-3-6-4" 2643 id="path3414-8-3-6-4"
2642 inkscape:connector-curvature="0" 2644 inkscape:connector-curvature="0"
2643 sodipodi:nodetypes="cc" /> 2645 sodipodi:nodetypes="cc" />
2644 <text
2645 xml:space="preserve"
2646 x="6659.5469"
2647 y="34833.551"
2648 font-style="normal"
2649 font-weight="bold"
2650 font-size="192"
2651 id="text202-62"
2652 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gpnum = rsp-&gt;gpnum</text>
2653 <path 2646 <path
2654 sodipodi:nodetypes="ccc" 2647 sodipodi:nodetypes="ccc"
2655 inkscape:connector-curvature="0" 2648 inkscape:connector-curvature="0"
@@ -3844,7 +3837,7 @@
3844 font-weight="bold" 3837 font-weight="bold"
3845 font-size="192" 3838 font-size="192"
3846 id="text202-6-6-5" 3839 id="text202-6-6-5"
3847 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rdp-&gt;gpnum</text> 3840 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rdp-&gt;gp_seq</text>
3848 <text 3841 <text
3849 xml:space="preserve" 3842 xml:space="preserve"
3850 x="5035.4155" 3843 x="5035.4155"
@@ -4284,15 +4277,6 @@
4284 style="fill:none;stroke-width:0.025in" 4277 style="fill:none;stroke-width:0.025in"
4285 transform="translate(1874.038,53203.538)" 4278 transform="translate(1874.038,53203.538)"
4286 id="g3188-7"> 4279 id="g3188-7">
4287 <text
4288 xml:space="preserve"
4289 x="3199.1516"
4290 y="13255.592"
4291 font-style="normal"
4292 font-weight="bold"
4293 font-size="192"
4294 id="text202-82"
4295 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">-&gt;completed = -&gt;gpnum</text>
4296 <g 4280 <g
4297 id="g3107-53" 4281 id="g3107-53"
4298 transform="translate(947.90548,11584.029)"> 4282 transform="translate(947.90548,11584.029)">
@@ -4325,6 +4309,17 @@
4325 sodipodi:linespacing="125%"><tspan 4309 sodipodi:linespacing="125%"><tspan
4326 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans" 4310 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans"
4327 id="tspan3104-6-5-19">Root</tspan></text> 4311 id="tspan3104-6-5-19">Root</tspan></text>
4312 <text
4313 xml:space="preserve"
4314 x="3175.896"
4315 y="13240.11"
4316 font-style="normal"
4317 font-weight="bold"
4318 font-size="192"
4319 id="text202-36-3"
4320 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
4321 style="font-size:172.87567139px"
4322 id="tspan3166">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
4328 </g> 4323 </g>
4329 <rect 4324 <rect
4330 ry="0" 4325 ry="0"
@@ -4371,13 +4366,15 @@
4371 </g> 4366 </g>
4372 <text 4367 <text
4373 xml:space="preserve" 4368 xml:space="preserve"
4374 x="5324.5371" 4369 x="5264.4829"
4375 y="15414.598" 4370 y="15411.231"
4376 font-style="normal" 4371 font-style="normal"
4377 font-weight="bold" 4372 font-weight="bold"
4378 font-size="192" 4373 font-size="192"
4379 id="text202-753" 4374 id="text202-36-7"
4380 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 4375 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
4376 style="font-size:172.87567139px"
4377 id="tspan3166-5">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
4381 </g> 4378 </g>
4382 <g 4379 <g
4383 style="fill:none;stroke-width:0.025in" 4380 style="fill:none;stroke-width:0.025in"
@@ -4412,30 +4409,12 @@
4412 sodipodi:linespacing="125%"><tspan 4409 sodipodi:linespacing="125%"><tspan
4413 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans" 4410 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans"
4414 id="tspan3104-6-5-6-0-4">Leaf</tspan></text> 4411 id="tspan3104-6-5-6-0-4">Leaf</tspan></text>
4415 <text
4416 xml:space="preserve"
4417 x="10084.225"
4418 y="70903.312"
4419 font-style="normal"
4420 font-weight="bold"
4421 font-size="192"
4422 id="text202-9-0"
4423 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text>
4424 <path 4412 <path
4425 sodipodi:nodetypes="ccc" 4413 sodipodi:nodetypes="ccc"
4426 inkscape:connector-curvature="0" 4414 inkscape:connector-curvature="0"
4427 id="path3134-9-0-3-9" 4415 id="path3134-9-0-3-9"
4428 d="m 6315.6122,72629.054 -20.9533,8108.684 1648.968,0" 4416 d="m 6315.6122,72629.054 -20.9533,8108.684 1648.968,0"
4429 style="fill:none;stroke:#969696;stroke-width:53.19251251;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Send)" /> 4417 style="fill:none;stroke:#969696;stroke-width:53.19251251;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Send)" />
4430 <text
4431 xml:space="preserve"
4432 x="5092.4683"
4433 y="74111.672"
4434 font-style="normal"
4435 font-weight="bold"
4436 font-size="192"
4437 id="text202-60"
4438 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rsp-&gt;completed =</text>
4439 <g 4418 <g
4440 style="fill:none;stroke-width:0.025in" 4419 style="fill:none;stroke-width:0.025in"
4441 id="g3107-62-6" 4420 id="g3107-62-6"
@@ -4469,15 +4448,6 @@
4469 sodipodi:linespacing="125%"><tspan 4448 sodipodi:linespacing="125%"><tspan
4470 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans" 4449 style="font-size:159.57754517px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;font-family:Liberation Sans;-inkscape-font-specification:Liberation Sans"
4471 id="tspan3104-6-5-7-7">Root</tspan></text> 4450 id="tspan3104-6-5-7-7">Root</tspan></text>
4472 <text
4473 xml:space="preserve"
4474 x="5092.4683"
4475 y="74325.906"
4476 font-style="normal"
4477 font-weight="bold"
4478 font-size="192"
4479 id="text202-60-3"
4480 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"> rnp-&gt;completed</text>
4481 <g 4451 <g
4482 style="fill:none;stroke-width:0.025in" 4452 style="fill:none;stroke-width:0.025in"
4483 transform="translate(1746.2528,60972.572)" 4453 transform="translate(1746.2528,60972.572)"
@@ -4736,13 +4706,15 @@
4736 </g> 4706 </g>
4737 <text 4707 <text
4738 xml:space="preserve" 4708 xml:space="preserve"
4739 x="5327.3057" 4709 x="5274.1216"
4740 y="15428.84" 4710 y="15411.231"
4741 font-style="normal" 4711 font-style="normal"
4742 font-weight="bold" 4712 font-weight="bold"
4743 font-size="192" 4713 font-size="192"
4744 id="text202-36" 4714 id="text202-36"
4745 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 4715 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
4716 style="font-size:172.87567139px"
4717 id="tspan3166-6">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
4746 </g> 4718 </g>
4747 <g 4719 <g
4748 transform="translate(-728.08545,53203.538)" 4720 transform="translate(-728.08545,53203.538)"
@@ -4821,13 +4793,15 @@
4821 id="tspan3104-6-5-6-0-92-5">Leaf</tspan></text> 4793 id="tspan3104-6-5-6-0-92-5">Leaf</tspan></text>
4822 <text 4794 <text
4823 xml:space="preserve" 4795 xml:space="preserve"
4824 x="7486.4907" 4796 x="7435.1987"
4825 y="17670.119" 4797 y="17708.281"
4826 font-style="normal" 4798 font-style="normal"
4827 font-weight="bold" 4799 font-weight="bold"
4828 font-size="192" 4800 font-size="192"
4829 id="text202-6-2" 4801 id="text202-36-9"
4830 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 4802 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
4803 style="font-size:172.87567139px"
4804 id="tspan3166-1">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
4831 </g> 4805 </g>
4832 <g 4806 <g
4833 transform="translate(-7393.5687,53203.538)" 4807 transform="translate(-7393.5687,53203.538)"
@@ -4868,13 +4842,15 @@
4868 id="tspan3104-6-5-6-0-1-5">Leaf</tspan></text> 4842 id="tspan3104-6-5-6-0-1-5">Leaf</tspan></text>
4869 <text 4843 <text
4870 xml:space="preserve" 4844 xml:space="preserve"
4871 x="7474.1382" 4845 x="7416.8125"
4872 y="17688.926" 4846 y="17708.281"
4873 font-style="normal" 4847 font-style="normal"
4874 font-weight="bold" 4848 font-weight="bold"
4875 font-size="192" 4849 font-size="192"
4876 id="text202-5-1" 4850 id="text202-36-35"
4877 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text> 4851 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
4852 style="font-size:172.87567139px"
4853 id="tspan3166-62">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
4878 </g> 4854 </g>
4879 <path 4855 <path
4880 style="fill:none;stroke:#000000;stroke-width:13.29812813px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)" 4856 style="fill:none;stroke:#000000;stroke-width:13.29812813px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)"
@@ -4908,15 +4884,6 @@
4908 id="path3414-8-3-6-67" 4884 id="path3414-8-3-6-67"
4909 inkscape:connector-curvature="0" 4885 inkscape:connector-curvature="0"
4910 sodipodi:nodetypes="cc" /> 4886 sodipodi:nodetypes="cc" />
4911 <text
4912 xml:space="preserve"
4913 x="6742.6001"
4914 y="70882.617"
4915 font-style="normal"
4916 font-weight="bold"
4917 font-size="192"
4918 id="text202-2"
4919 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;completed = -&gt;gpnum</text>
4920 <g 4887 <g
4921 style="fill:none;stroke-width:0.025in" 4888 style="fill:none;stroke-width:0.025in"
4922 id="g4504-3-9-6" 4889 id="g4504-3-9-6"
@@ -5131,5 +5098,47 @@
5131 font-size="192" 5098 font-size="192"
5132 id="text202-7-9-6-6-7" 5099 id="text202-7-9-6-6-7"
5133 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_do_batch()</text> 5100 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_do_batch()</text>
5101 <text
5102 xml:space="preserve"
5103 x="6698.9019"
5104 y="70885.211"
5105 font-style="normal"
5106 font-weight="bold"
5107 font-size="192"
5108 id="text202-36-2"
5109 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
5110 style="font-size:172.87567139px"
5111 id="tspan3166-7">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
5112 <text
5113 xml:space="preserve"
5114 x="10023.457"
5115 y="70885.234"
5116 font-style="normal"
5117 font-weight="bold"
5118 font-size="192"
5119 id="text202-36-0"
5120 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
5121 style="font-size:172.87567139px"
5122 id="tspan3166-9">rcu_seq_end(&amp;rnp-&gt;gp_seq)</tspan></text>
5123 <text
5124 xml:space="preserve"
5125 x="5023.3389"
5126 y="74209.773"
5127 font-style="normal"
5128 font-weight="bold"
5129 font-size="192"
5130 id="text202-36-36"
5131 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"><tspan
5132 style="font-size:172.87567139px"
5133 id="tspan3166-0">rcu_seq_end(&amp;rsp-&gt;gp_seq)</tspan></text>
5134 <text
5135 xml:space="preserve"
5136 x="6562.5884"
5137 y="34870.727"
5138 font-style="normal"
5139 font-weight="bold"
5140 font-size="192"
5141 id="text202-3"
5142 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">-&gt;gp_seq = rsp-&gt;gp_seq</text>
5134 </g> 5143 </g>
5135</svg> 5144</svg>
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
index de3992f4cbe1..149bec2a4493 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
@@ -300,13 +300,13 @@
300 inkscape:window-height="1144" 300 inkscape:window-height="1144"
301 id="namedview208" 301 id="namedview208"
302 showgrid="true" 302 showgrid="true"
303 inkscape:zoom="0.70710678" 303 inkscape:zoom="0.96484375"
304 inkscape:cx="616.47598" 304 inkscape:cx="507.0191"
305 inkscape:cy="595.41964" 305 inkscape:cy="885.62207"
306 inkscape:window-x="813" 306 inkscape:window-x="47"
307 inkscape:window-y="28" 307 inkscape:window-y="28"
308 inkscape:window-maximized="0" 308 inkscape:window-maximized="0"
309 inkscape:current-layer="g4405" 309 inkscape:current-layer="g3115"
310 fit-margin-top="5" 310 fit-margin-top="5"
311 fit-margin-right="5" 311 fit-margin-right="5"
312 fit-margin-left="5" 312 fit-margin-left="5"
@@ -710,7 +710,7 @@
710 font-weight="bold" 710 font-weight="bold"
711 font-size="192" 711 font-size="192"
712 id="text202-6-6" 712 id="text202-6-6"
713 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rdp-&gt;gpnum</text> 713 style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rdp-&gt;gp_seq</text>
714 <text 714 <text
715 xml:space="preserve" 715 xml:space="preserve"
716 x="5035.4155" 716 x="5035.4155"
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 4259f95c3261..f99cf11b314b 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -172,7 +172,7 @@ it will print a message similar to the following:
172 INFO: rcu_sched detected stalls on CPUs/tasks: 172 INFO: rcu_sched detected stalls on CPUs/tasks:
173 2-...: (3 GPs behind) idle=06c/0/0 softirq=1453/1455 fqs=0 173 2-...: (3 GPs behind) idle=06c/0/0 softirq=1453/1455 fqs=0
174 16-...: (0 ticks this GP) idle=81c/0/0 softirq=764/764 fqs=0 174 16-...: (0 ticks this GP) idle=81c/0/0 softirq=764/764 fqs=0
175 (detected by 32, t=2603 jiffies, g=7073, c=7072, q=625) 175 (detected by 32, t=2603 jiffies, g=7075, q=625)
176 176
177This message indicates that CPU 32 detected that CPUs 2 and 16 were both 177This message indicates that CPU 32 detected that CPUs 2 and 16 were both
178causing stalls, and that the stall was affecting RCU-sched. This message 178causing stalls, and that the stall was affecting RCU-sched. This message
@@ -215,11 +215,10 @@ CPU since the last time that this CPU noted the beginning of a grace
215period. 215period.
216 216
217The "detected by" line indicates which CPU detected the stall (in this 217The "detected by" line indicates which CPU detected the stall (in this
218case, CPU 32), how many jiffies have elapsed since the start of the 218case, CPU 32), how many jiffies have elapsed since the start of the grace
219grace period (in this case 2603), the number of the last grace period 219period (in this case 2603), the grace-period sequence number (7075), and
220to start and to complete (7073 and 7072, respectively), and an estimate 220an estimate of the total number of RCU callbacks queued across all CPUs
221of the total number of RCU callbacks queued across all CPUs (625 in 221(625 in this case).
222this case).
223 222
224In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed 223In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed
225for each CPU: 224for each CPU:
@@ -266,15 +265,16 @@ If the relevant grace-period kthread has been unable to run prior to
266the stall warning, as was the case in the "All QSes seen" line above, 265the stall warning, as was the case in the "All QSes seen" line above,
267the following additional line is printed: 266the following additional line is printed:
268 267
269 kthread starved for 23807 jiffies! g7073 c7072 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 268 kthread starved for 23807 jiffies! g7075 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 ->cpu=5
270 269
271Starving the grace-period kthreads of CPU time can of course result 270Starving the grace-period kthreads of CPU time can of course result
272in RCU CPU stall warnings even when all CPUs and tasks have passed 271in RCU CPU stall warnings even when all CPUs and tasks have passed
273through the required quiescent states. The "g" and "c" numbers flag the 272through the required quiescent states. The "g" number shows the current
274number of the last grace period started and completed, respectively, 273grace-period sequence number, the "f" precedes the ->gp_flags command
275the "f" precedes the ->gp_flags command to the grace-period kthread, 274to the grace-period kthread, the "RCU_GP_WAIT_FQS" indicates that the
276the "RCU_GP_WAIT_FQS" indicates that the kthread is waiting for a short 275kthread is waiting for a short timeout, the "state" precedes value of the
277timeout, and the "state" precedes value of the task_struct ->state field. 276task_struct ->state field, and the "cpu" indicates that the grace-period
277kthread last ran on CPU 5.
278 278
279 279
280Multiple Warnings From One Stall 280Multiple Warnings From One Stall
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 65eb856526b7..c2a7facf7ff9 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -588,6 +588,7 @@ It is extremely simple:
588 void synchronize_rcu(void) 588 void synchronize_rcu(void)
589 { 589 {
590 write_lock(&rcu_gp_mutex); 590 write_lock(&rcu_gp_mutex);
591 smp_mb__after_spinlock();
591 write_unlock(&rcu_gp_mutex); 592 write_unlock(&rcu_gp_mutex);
592 } 593 }
593 594
@@ -609,12 +610,15 @@ don't forget about them when submitting patches making use of RCU!]
609 610
610The rcu_read_lock() and rcu_read_unlock() primitive read-acquire 611The rcu_read_lock() and rcu_read_unlock() primitive read-acquire
611and release a global reader-writer lock. The synchronize_rcu() 612and release a global reader-writer lock. The synchronize_rcu()
612primitive write-acquires this same lock, then immediately releases 613primitive write-acquires this same lock, then releases it. This means
613it. This means that once synchronize_rcu() exits, all RCU read-side 614that once synchronize_rcu() exits, all RCU read-side critical sections
614critical sections that were in progress before synchronize_rcu() was 615that were in progress before synchronize_rcu() was called are guaranteed
615called are guaranteed to have completed -- there is no way that 616to have completed -- there is no way that synchronize_rcu() would have
616synchronize_rcu() would have been able to write-acquire the lock 617been able to write-acquire the lock otherwise. The smp_mb__after_spinlock()
617otherwise. 618promotes synchronize_rcu() to a full memory barrier in compliance with
619the "Memory-Barrier Guarantees" listed in:
620
621 Documentation/RCU/Design/Requirements/Requirements.html.
618 622
619It is possible to nest rcu_read_lock(), since reader-writer locks may 623It is possible to nest rcu_read_lock(), since reader-writer locks may
620be recursively acquired. Note also that rcu_read_lock() is immune 624be recursively acquired. Note also that rcu_read_lock() is immune
@@ -816,11 +820,13 @@ RCU list traversal:
816 list_next_rcu 820 list_next_rcu
817 list_for_each_entry_rcu 821 list_for_each_entry_rcu
818 list_for_each_entry_continue_rcu 822 list_for_each_entry_continue_rcu
823 list_for_each_entry_from_rcu
819 hlist_first_rcu 824 hlist_first_rcu
820 hlist_next_rcu 825 hlist_next_rcu
821 hlist_pprev_rcu 826 hlist_pprev_rcu
822 hlist_for_each_entry_rcu 827 hlist_for_each_entry_rcu
823 hlist_for_each_entry_rcu_bh 828 hlist_for_each_entry_rcu_bh
829 hlist_for_each_entry_from_rcu
824 hlist_for_each_entry_continue_rcu 830 hlist_for_each_entry_continue_rcu
825 hlist_for_each_entry_continue_rcu_bh 831 hlist_for_each_entry_continue_rcu_bh
826 hlist_nulls_first_rcu 832 hlist_nulls_first_rcu
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 533ff5c68970..5cde1ff32ff3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2835,8 +2835,6 @@
2835 2835
2836 nosync [HW,M68K] Disables sync negotiation for all devices. 2836 nosync [HW,M68K] Disables sync negotiation for all devices.
2837 2837
2838 notsc [BUGS=X86-32] Disable Time Stamp Counter
2839
2840 nowatchdog [KNL] Disable both lockup detectors, i.e. 2838 nowatchdog [KNL] Disable both lockup detectors, i.e.
2841 soft-lockup and NMI watchdog (hard-lockup). 2839 soft-lockup and NMI watchdog (hard-lockup).
2842 2840
@@ -3632,8 +3630,8 @@
3632 Set time (s) after boot for CPU-hotplug testing. 3630 Set time (s) after boot for CPU-hotplug testing.
3633 3631
3634 rcutorture.onoff_interval= [KNL] 3632 rcutorture.onoff_interval= [KNL]
3635 Set time (s) between CPU-hotplug operations, or 3633 Set time (jiffies) between CPU-hotplug operations,
3636 zero to disable CPU-hotplug testing. 3634 or zero to disable CPU-hotplug testing.
3637 3635
3638 rcutorture.shuffle_interval= [KNL] 3636 rcutorture.shuffle_interval= [KNL]
3639 Set task-shuffle interval (s). Shuffling tasks 3637 Set task-shuffle interval (s). Shuffling tasks
diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst
index 2e7165f86f55..724583453e1f 100644
--- a/Documentation/core-api/atomic_ops.rst
+++ b/Documentation/core-api/atomic_ops.rst
@@ -29,7 +29,7 @@ updated by one CPU, local_t is probably more appropriate. Please see
29local_t. 29local_t.
30 30
31The first operations to implement for atomic_t's are the initializers and 31The first operations to implement for atomic_t's are the initializers and
32plain reads. :: 32plain writes. ::
33 33
34 #define ATOMIC_INIT(i) { (i) } 34 #define ATOMIC_INIT(i) { (i) }
35 #define atomic_set(v, i) ((v)->counter = (i)) 35 #define atomic_set(v, i) ((v)->counter = (i))
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt
index 5f89fb635a1b..f97fd8ab5e45 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt
@@ -4,6 +4,7 @@ Required properties:
4 4
5- compatible : should be "ingenic,<socname>-intc". Valid strings are: 5- compatible : should be "ingenic,<socname>-intc". Valid strings are:
6 ingenic,jz4740-intc 6 ingenic,jz4740-intc
7 ingenic,jz4725b-intc
7 ingenic,jz4770-intc 8 ingenic,jz4770-intc
8 ingenic,jz4775-intc 9 ingenic,jz4775-intc
9 ingenic,jz4780-intc 10 ingenic,jz4780-intc
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
index 20f121daa910..697ca2f26d1b 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
@@ -7,6 +7,7 @@ Required properties:
7 - "renesas,irqc-r8a73a4" (R-Mobile APE6) 7 - "renesas,irqc-r8a73a4" (R-Mobile APE6)
8 - "renesas,irqc-r8a7743" (RZ/G1M) 8 - "renesas,irqc-r8a7743" (RZ/G1M)
9 - "renesas,irqc-r8a7745" (RZ/G1E) 9 - "renesas,irqc-r8a7745" (RZ/G1E)
10 - "renesas,irqc-r8a77470" (RZ/G1C)
10 - "renesas,irqc-r8a7790" (R-Car H2) 11 - "renesas,irqc-r8a7790" (R-Car H2)
11 - "renesas,irqc-r8a7791" (R-Car M2-W) 12 - "renesas,irqc-r8a7791" (R-Car M2-W)
12 - "renesas,irqc-r8a7792" (R-Car V2H) 13 - "renesas,irqc-r8a7792" (R-Car V2H)
@@ -16,6 +17,7 @@ Required properties:
16 - "renesas,intc-ex-r8a7796" (R-Car M3-W) 17 - "renesas,intc-ex-r8a7796" (R-Car M3-W)
17 - "renesas,intc-ex-r8a77965" (R-Car M3-N) 18 - "renesas,intc-ex-r8a77965" (R-Car M3-N)
18 - "renesas,intc-ex-r8a77970" (R-Car V3M) 19 - "renesas,intc-ex-r8a77970" (R-Car V3M)
20 - "renesas,intc-ex-r8a77980" (R-Car V3H)
19 - "renesas,intc-ex-r8a77995" (R-Car D3) 21 - "renesas,intc-ex-r8a77995" (R-Car D3)
20- #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in 22- #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in
21 interrupts.txt in this directory 23 interrupts.txt in this directory
diff --git a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
index b1fe7e9de1b4..18d4d0166c76 100644
--- a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
+++ b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
@@ -1,19 +1,25 @@
1Mediatek MT6577, MT6572 and MT6589 Timers 1Mediatek Timers
2--------------------------------------- 2---------------
3
4Mediatek SoCs have two different timers on different platforms,
5- GPT (General Purpose Timer)
6- SYST (System Timer)
7
8The proper timer will be selected automatically by driver.
3 9
4Required properties: 10Required properties:
5- compatible should contain: 11- compatible should contain:
6 * "mediatek,mt2701-timer" for MT2701 compatible timers 12 * "mediatek,mt2701-timer" for MT2701 compatible timers (GPT)
7 * "mediatek,mt6580-timer" for MT6580 compatible timers 13 * "mediatek,mt6580-timer" for MT6580 compatible timers (GPT)
8 * "mediatek,mt6589-timer" for MT6589 compatible timers 14 * "mediatek,mt6589-timer" for MT6589 compatible timers (GPT)
9 * "mediatek,mt7623-timer" for MT7623 compatible timers 15 * "mediatek,mt7623-timer" for MT7623 compatible timers (GPT)
10 * "mediatek,mt8127-timer" for MT8127 compatible timers 16 * "mediatek,mt8127-timer" for MT8127 compatible timers (GPT)
11 * "mediatek,mt8135-timer" for MT8135 compatible timers 17 * "mediatek,mt8135-timer" for MT8135 compatible timers (GPT)
12 * "mediatek,mt8173-timer" for MT8173 compatible timers 18 * "mediatek,mt8173-timer" for MT8173 compatible timers (GPT)
13 * "mediatek,mt6577-timer" for MT6577 and all above compatible timers 19 * "mediatek,mt6577-timer" for MT6577 and all above compatible timers (GPT)
14- reg: Should contain location and length for timers register. 20 * "mediatek,mt6765-timer" for MT6765 compatible timers (SYST)
15- clocks: Clocks driving the timer hardware. This list should include two 21- reg: Should contain location and length for timer register.
16 clocks. The order is system clock and as second clock the RTC clock. 22- clocks: Should contain system clock.
17 23
18Examples: 24Examples:
19 25
@@ -21,5 +27,5 @@ Examples:
21 compatible = "mediatek,mt6577-timer"; 27 compatible = "mediatek,mt6577-timer";
22 reg = <0x10008000 0x80>; 28 reg = <0x10008000 0x80>;
23 interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_LOW>; 29 interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_LOW>;
24 clocks = <&system_clk>, <&rtc_clk>; 30 clocks = <&system_clk>;
25 }; 31 };
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index a02d6bbfc9d0..0d8d7ef131e9 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2179,32 +2179,41 @@ or:
2179 event_indicated = 1; 2179 event_indicated = 1;
2180 wake_up_process(event_daemon); 2180 wake_up_process(event_daemon);
2181 2181
2182A write memory barrier is implied by wake_up() and co. if and only if they 2182A general memory barrier is executed by wake_up() if it wakes something up.
2183wake something up. The barrier occurs before the task state is cleared, and so 2183If it doesn't wake anything up then a memory barrier may or may not be
2184sits between the STORE to indicate the event and the STORE to set TASK_RUNNING: 2184executed; you must not rely on it. The barrier occurs before the task state
2185is accessed, in particular, it sits between the STORE to indicate the event
2186and the STORE to set TASK_RUNNING:
2185 2187
2186 CPU 1 CPU 2 2188 CPU 1 (Sleeper) CPU 2 (Waker)
2187 =============================== =============================== 2189 =============================== ===============================
2188 set_current_state(); STORE event_indicated 2190 set_current_state(); STORE event_indicated
2189 smp_store_mb(); wake_up(); 2191 smp_store_mb(); wake_up();
2190 STORE current->state <write barrier> 2192 STORE current->state ...
2191 <general barrier> STORE current->state 2193 <general barrier> <general barrier>
2192 LOAD event_indicated 2194 LOAD event_indicated if ((LOAD task->state) & TASK_NORMAL)
2195 STORE task->state
2193 2196
2194To repeat, this write memory barrier is present if and only if something 2197where "task" is the thread being woken up and it equals CPU 1's "current".
2195is actually awakened. To see this, consider the following sequence of 2198
2196events, where X and Y are both initially zero: 2199To repeat, a general memory barrier is guaranteed to be executed by wake_up()
2200if something is actually awakened, but otherwise there is no such guarantee.
2201To see this, consider the following sequence of events, where X and Y are both
2202initially zero:
2197 2203
2198 CPU 1 CPU 2 2204 CPU 1 CPU 2
2199 =============================== =============================== 2205 =============================== ===============================
2200 X = 1; STORE event_indicated 2206 X = 1; Y = 1;
2201 smp_mb(); wake_up(); 2207 smp_mb(); wake_up();
2202 Y = 1; wait_event(wq, Y == 1); 2208 LOAD Y LOAD X
2203 wake_up(); load from Y sees 1, no memory barrier 2209
2204 load from X might see 0 2210If a wakeup does occur, one (at least) of the two loads must see 1. If, on
2211the other hand, a wakeup does not occur, both loads might see 0.
2205 2212
2206In contrast, if a wakeup does occur, CPU 2's load from X would be guaranteed 2213wake_up_process() always executes a general memory barrier. The barrier again
2207to see 1. 2214occurs before the task state is accessed. In particular, if the wake_up() in
2215the previous snippet were replaced by a call to wake_up_process() then one of
2216the two loads would be guaranteed to see 1.
2208 2217
2209The available waker functions include: 2218The available waker functions include:
2210 2219
@@ -2224,6 +2233,8 @@ The available waker functions include:
2224 wake_up_poll(); 2233 wake_up_poll();
2225 wake_up_process(); 2234 wake_up_process();
2226 2235
2236In terms of memory ordering, these functions all provide the same guarantees of
2237a wake_up() (or stronger).
2227 2238
2228[!] Note that the memory barriers implied by the sleeper and the waker do _not_ 2239[!] Note that the memory barriers implied by the sleeper and the waker do _not_
2229order multiple stores before the wake-up with respect to loads of those stored 2240order multiple stores before the wake-up with respect to loads of those stored
diff --git a/Documentation/networking/dpaa2/overview.rst b/Documentation/networking/dpaa2/overview.rst
index 79fede4447d6..d638b5a8aadd 100644
--- a/Documentation/networking/dpaa2/overview.rst
+++ b/Documentation/networking/dpaa2/overview.rst
@@ -1,5 +1,6 @@
1.. include:: <isonum.txt> 1.. include:: <isonum.txt>
2 2
3=========================================================
3DPAA2 (Data Path Acceleration Architecture Gen2) Overview 4DPAA2 (Data Path Acceleration Architecture Gen2) Overview
4========================================================= 5=========================================================
5 6
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 921739d00f69..7f01fb1c1084 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -1891,22 +1891,22 @@ Mandatory ë°°ë¦¬ì–´ë“¤ì€ SMP 시스템ì—ì„œë„ UP 시스템ì—ì„œë„ SMP 효ê³
1891 /* ì†Œìœ ê¶Œì„ ìˆ˜ì • */ 1891 /* ì†Œìœ ê¶Œì„ ìˆ˜ì • */
1892 desc->status = DEVICE_OWN; 1892 desc->status = DEVICE_OWN;
1893 1893
1894 /* MMIO 를 통해 디바ì´ìŠ¤ì— ê³µì§€ë¥¼ 하기 ì „ì— ë©”ëª¨ë¦¬ë¥¼ ë™ê¸°í™” */
1895 wmb();
1896
1897 /* ì—…ë°ì´íŠ¸ëœ ë””ìŠ¤í¬ë¦½í„°ì˜ 디바ì´ìŠ¤ì— ê³µì§€ */ 1894 /* ì—…ë°ì´íŠ¸ëœ ë””ìŠ¤í¬ë¦½í„°ì˜ 디바ì´ìŠ¤ì— ê³µì§€ */
1898 writel(DESC_NOTIFY, doorbell); 1895 writel(DESC_NOTIFY, doorbell);
1899 } 1896 }
1900 1897
1901 dma_rmb() 는 디스í¬ë¦½í„°ë¡œë¶€í„° ë°ì´í„°ë¥¼ ì½ì–´ì˜¤ê¸° ì „ì— ë””ë°”ì´ìŠ¤ê°€ ì†Œìœ ê¶Œì„ 1898 dma_rmb() 는 디스í¬ë¦½í„°ë¡œë¶€í„° ë°ì´í„°ë¥¼ ì½ì–´ì˜¤ê¸° ì „ì— ë””ë°”ì´ìŠ¤ê°€ 소유권ì„
1902 내놓았ìŒì„ 보장하게 하고, dma_wmb() 는 디바ì´ìŠ¤ê°€ ìžì‹ ì´ ì†Œìœ ê¶Œì„ ë‹¤ì‹œ 1899 ë‚´ë ¤ë†“ì•˜ì„ ê²ƒì„ ë³´ìž¥í•˜ê³ , dma_wmb() 는 디바ì´ìŠ¤ê°€ ìžì‹ ì´ ì†Œìœ ê¶Œì„ ë‹¤ì‹œ
1903 가졌ìŒì„ 보기 ì „ì— ë””ìŠ¤í¬ë¦½í„°ì— ë°ì´í„°ê°€ 쓰였ìŒì„ 보장합니다. wmb() 는 1900 가졌ìŒì„ 보기 ì „ì— ë””ìŠ¤í¬ë¦½í„°ì— ë°ì´í„°ê°€ ì“°ì˜€ì„ ê²ƒì„ ë³´ìž¥í•©ë‹ˆë‹¤. 참고로,
1904 ìºì‹œ ì¼ê´€ì„±ì´ 없는 (cache incoherent) MMIO ì˜ì—­ì— 쓰기를 시ë„하기 ì „ì— 1901 writel() ì„ ì‚¬ìš©í•˜ë©´ ìºì‹œ ì¼ê´€ì„±ì´ 있는 메모리 (cache coherent memory)
1905 ìºì‹œ ì¼ê´€ì„±ì´ 있는 메모리 (cache coherent memory) 쓰기가 완료ë˜ì—ˆìŒì„ 1902 쓰기가 MMIO ì˜ì—­ì—ì˜ ì“°ê¸° ì „ì— ì™„ë£Œë˜ì—ˆì„ ê²ƒì„ ë³´ìž¥í•˜ë¯€ë¡œ writel() 앞ì—
1906 보장해주기 위해 필요합니다. 1903 wmb() 를 실행할 필요가 ì—†ìŒì„ 알아ë‘시기 ë°”ëžë‹ˆë‹¤. writel() 보다 비용ì´
1907 1904 저렴한 writel_relaxed() 는 ì´ëŸ° ë³´ìž¥ì„ ì œê³µí•˜ì§€ 않으므로 여기선 사용ë˜ì§€
1908 consistent memory ì— ëŒ€í•œ ìžì„¸í•œ ë‚´ìš©ì„ ìœ„í•´ì„  Documentation/DMA-API.txt 1905 않아야 합니다.
1909 문서를 참고하세요. 1906
1907 writel_relaxed() 와 ê°™ì€ ì™„í™”ëœ I/O ì ‘ê·¼ìžë“¤ì— 대한 ìžì„¸í•œ ë‚´ìš©ì„ ìœ„í•´ì„œëŠ”
1908 "ì»¤ë„ I/O ë°°ë¦¬ì–´ì˜ íš¨ê³¼" 섹션ì„, consistent memory ì— ëŒ€í•œ ìžì„¸í•œ ë‚´ìš©ì„
1909 위해선 Documentation/DMA-API.txt 문서를 참고하세요.
1910 1910
1911 1911
1912MMIO 쓰기 배리어 1912MMIO 쓰기 배리어
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index a16aa2113840..f662d3c530e5 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -29,7 +29,11 @@ mount options are:
29L2 and L3 CDP are controlled seperately. 29L2 and L3 CDP are controlled seperately.
30 30
31RDT features are orthogonal. A particular system may support only 31RDT features are orthogonal. A particular system may support only
32monitoring, only control, or both monitoring and control. 32monitoring, only control, or both monitoring and control. Cache
33pseudo-locking is a unique way of using cache control to "pin" or
34"lock" data in the cache. Details can be found in
35"Cache Pseudo-Locking".
36
33 37
34The mount succeeds if either of allocation or monitoring is present, but 38The mount succeeds if either of allocation or monitoring is present, but
35only those files and directories supported by the system will be created. 39only those files and directories supported by the system will be created.
@@ -65,6 +69,29 @@ related to allocation:
65 some platforms support devices that have their 69 some platforms support devices that have their
66 own settings for cache use which can over-ride 70 own settings for cache use which can over-ride
67 these bits. 71 these bits.
72"bit_usage": Annotated capacity bitmasks showing how all
73 instances of the resource are used. The legend is:
74 "0" - Corresponding region is unused. When the system's
75 resources have been allocated and a "0" is found
76 in "bit_usage" it is a sign that resources are
77 wasted.
78 "H" - Corresponding region is used by hardware only
79 but available for software use. If a resource
80 has bits set in "shareable_bits" but not all
81 of these bits appear in the resource groups'
82 schematas then the bits appearing in
83 "shareable_bits" but no resource group will
84 be marked as "H".
85 "X" - Corresponding region is available for sharing and
86 used by hardware and software. These are the
87 bits that appear in "shareable_bits" as
88 well as a resource group's allocation.
89 "S" - Corresponding region is used by software
90 and available for sharing.
91 "E" - Corresponding region is used exclusively by
92 one resource group. No sharing allowed.
93 "P" - Corresponding region is pseudo-locked. No
94 sharing allowed.
68 95
69Memory bandwitdh(MB) subdirectory contains the following files 96Memory bandwitdh(MB) subdirectory contains the following files
70with respect to allocation: 97with respect to allocation:
@@ -151,6 +178,9 @@ All groups contain the following files:
151 CPUs to/from this group. As with the tasks file a hierarchy is 178 CPUs to/from this group. As with the tasks file a hierarchy is
152 maintained where MON groups may only include CPUs owned by the 179 maintained where MON groups may only include CPUs owned by the
153 parent CTRL_MON group. 180 parent CTRL_MON group.
181 When the resouce group is in pseudo-locked mode this file will
182 only be readable, reflecting the CPUs associated with the
183 pseudo-locked region.
154 184
155 185
156"cpus_list": 186"cpus_list":
@@ -163,6 +193,21 @@ When control is enabled all CTRL_MON groups will also contain:
163 A list of all the resources available to this group. 193 A list of all the resources available to this group.
164 Each resource has its own line and format - see below for details. 194 Each resource has its own line and format - see below for details.
165 195
196"size":
197 Mirrors the display of the "schemata" file to display the size in
198 bytes of each allocation instead of the bits representing the
199 allocation.
200
201"mode":
202 The "mode" of the resource group dictates the sharing of its
203 allocations. A "shareable" resource group allows sharing of its
204 allocations while an "exclusive" resource group does not. A
205 cache pseudo-locked region is created by first writing
206 "pseudo-locksetup" to the "mode" file before writing the cache
207 pseudo-locked region's schemata to the resource group's "schemata"
208 file. On successful pseudo-locked region creation the mode will
209 automatically change to "pseudo-locked".
210
166When monitoring is enabled all MON groups will also contain: 211When monitoring is enabled all MON groups will also contain:
167 212
168"mon_data": 213"mon_data":
@@ -379,6 +424,170 @@ L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
379L3DATA:0=fffff;1=fffff;2=3c0;3=fffff 424L3DATA:0=fffff;1=fffff;2=3c0;3=fffff
380L3CODE:0=fffff;1=fffff;2=fffff;3=fffff 425L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
381 426
427Cache Pseudo-Locking
428--------------------
429CAT enables a user to specify the amount of cache space that an
430application can fill. Cache pseudo-locking builds on the fact that a
431CPU can still read and write data pre-allocated outside its current
432allocated area on a cache hit. With cache pseudo-locking, data can be
433preloaded into a reserved portion of cache that no application can
434fill, and from that point on will only serve cache hits. The cache
435pseudo-locked memory is made accessible to user space where an
436application can map it into its virtual address space and thus have
437a region of memory with reduced average read latency.
438
439The creation of a cache pseudo-locked region is triggered by a request
440from the user to do so that is accompanied by a schemata of the region
441to be pseudo-locked. The cache pseudo-locked region is created as follows:
442- Create a CAT allocation CLOSNEW with a CBM matching the schemata
443 from the user of the cache region that will contain the pseudo-locked
444 memory. This region must not overlap with any current CAT allocation/CLOS
445 on the system and no future overlap with this cache region is allowed
446 while the pseudo-locked region exists.
447- Create a contiguous region of memory of the same size as the cache
448 region.
449- Flush the cache, disable hardware prefetchers, disable preemption.
450- Make CLOSNEW the active CLOS and touch the allocated memory to load
451 it into the cache.
452- Set the previous CLOS as active.
453- At this point the closid CLOSNEW can be released - the cache
454 pseudo-locked region is protected as long as its CBM does not appear in
455 any CAT allocation. Even though the cache pseudo-locked region will from
456 this point on not appear in any CBM of any CLOS an application running with
457 any CLOS will be able to access the memory in the pseudo-locked region since
458 the region continues to serve cache hits.
459- The contiguous region of memory loaded into the cache is exposed to
460 user-space as a character device.
461
462Cache pseudo-locking increases the probability that data will remain
463in the cache via carefully configuring the CAT feature and controlling
464application behavior. There is no guarantee that data is placed in
465cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict
466“locked†data from cache. Power management C-states may shrink or
467power off cache. Deeper C-states will automatically be restricted on
468pseudo-locked region creation.
469
470It is required that an application using a pseudo-locked region runs
471with affinity to the cores (or a subset of the cores) associated
472with the cache on which the pseudo-locked region resides. A sanity check
473within the code will not allow an application to map pseudo-locked memory
474unless it runs with affinity to cores associated with the cache on which the
475pseudo-locked region resides. The sanity check is only done during the
476initial mmap() handling, there is no enforcement afterwards and the
477application self needs to ensure it remains affine to the correct cores.
478
479Pseudo-locking is accomplished in two stages:
4801) During the first stage the system administrator allocates a portion
481 of cache that should be dedicated to pseudo-locking. At this time an
482 equivalent portion of memory is allocated, loaded into allocated
483 cache portion, and exposed as a character device.
4842) During the second stage a user-space application maps (mmap()) the
485 pseudo-locked memory into its address space.
486
487Cache Pseudo-Locking Interface
488------------------------------
489A pseudo-locked region is created using the resctrl interface as follows:
490
4911) Create a new resource group by creating a new directory in /sys/fs/resctrl.
4922) Change the new resource group's mode to "pseudo-locksetup" by writing
493 "pseudo-locksetup" to the "mode" file.
4943) Write the schemata of the pseudo-locked region to the "schemata" file. All
495 bits within the schemata should be "unused" according to the "bit_usage"
496 file.
497
498On successful pseudo-locked region creation the "mode" file will contain
499"pseudo-locked" and a new character device with the same name as the resource
500group will exist in /dev/pseudo_lock. This character device can be mmap()'ed
501by user space in order to obtain access to the pseudo-locked memory region.
502
503An example of cache pseudo-locked region creation and usage can be found below.
504
505Cache Pseudo-Locking Debugging Interface
506---------------------------------------
507The pseudo-locking debugging interface is enabled by default (if
508CONFIG_DEBUG_FS is enabled) and can be found in /sys/kernel/debug/resctrl.
509
510There is no explicit way for the kernel to test if a provided memory
511location is present in the cache. The pseudo-locking debugging interface uses
512the tracing infrastructure to provide two ways to measure cache residency of
513the pseudo-locked region:
5141) Memory access latency using the pseudo_lock_mem_latency tracepoint. Data
515 from these measurements are best visualized using a hist trigger (see
516 example below). In this test the pseudo-locked region is traversed at
517 a stride of 32 bytes while hardware prefetchers and preemption
518 are disabled. This also provides a substitute visualization of cache
519 hits and misses.
5202) Cache hit and miss measurements using model specific precision counters if
521 available. Depending on the levels of cache on the system the pseudo_lock_l2
522 and pseudo_lock_l3 tracepoints are available.
523 WARNING: triggering this measurement uses from two (for just L2
524 measurements) to four (for L2 and L3 measurements) precision counters on
525 the system, if any other measurements are in progress the counters and
526 their corresponding event registers will be clobbered.
527
528When a pseudo-locked region is created a new debugfs directory is created for
529it in debugfs as /sys/kernel/debug/resctrl/<newdir>. A single
530write-only file, pseudo_lock_measure, is present in this directory. The
531measurement on the pseudo-locked region depends on the number, 1 or 2,
532written to this debugfs file. Since the measurements are recorded with the
533tracing infrastructure the relevant tracepoints need to be enabled before the
534measurement is triggered.
535
536Example of latency debugging interface:
537In this example a pseudo-locked region named "newlock" was created. Here is
538how we can measure the latency in cycles of reading from this region and
539visualize this data with a histogram that is available if CONFIG_HIST_TRIGGERS
540is set:
541# :> /sys/kernel/debug/tracing/trace
542# echo 'hist:keys=latency' > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/trigger
543# echo 1 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/enable
544# echo 1 > /sys/kernel/debug/resctrl/newlock/pseudo_lock_measure
545# echo 0 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/enable
546# cat /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/hist
547
548# event histogram
549#
550# trigger info: hist:keys=latency:vals=hitcount:sort=hitcount:size=2048 [active]
551#
552
553{ latency: 456 } hitcount: 1
554{ latency: 50 } hitcount: 83
555{ latency: 36 } hitcount: 96
556{ latency: 44 } hitcount: 174
557{ latency: 48 } hitcount: 195
558{ latency: 46 } hitcount: 262
559{ latency: 42 } hitcount: 693
560{ latency: 40 } hitcount: 3204
561{ latency: 38 } hitcount: 3484
562
563Totals:
564 Hits: 8192
565 Entries: 9
566 Dropped: 0
567
568Example of cache hits/misses debugging:
569In this example a pseudo-locked region named "newlock" was created on the L2
570cache of a platform. Here is how we can obtain details of the cache hits
571and misses using the platform's precision counters.
572
573# :> /sys/kernel/debug/tracing/trace
574# echo 1 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_l2/enable
575# echo 2 > /sys/kernel/debug/resctrl/newlock/pseudo_lock_measure
576# echo 0 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_l2/enable
577# cat /sys/kernel/debug/tracing/trace
578
579# tracer: nop
580#
581# _-----=> irqs-off
582# / _----=> need-resched
583# | / _---=> hardirq/softirq
584# || / _--=> preempt-depth
585# ||| / delay
586# TASK-PID CPU# |||| TIMESTAMP FUNCTION
587# | | | |||| | |
588 pseudo_lock_mea-1672 [002] .... 3132.860500: pseudo_lock_l2: hits=4097 miss=0
589
590
382Examples for RDT allocation usage: 591Examples for RDT allocation usage:
383 592
384Example 1 593Example 1
@@ -502,7 +711,172 @@ siblings and only the real time threads are scheduled on the cores 4-7.
502 711
503# echo F0 > p0/cpus 712# echo F0 > p0/cpus
504 713
5054) Locking between applications 714Example 4
715---------
716
717The resource groups in previous examples were all in the default "shareable"
718mode allowing sharing of their cache allocations. If one resource group
719configures a cache allocation then nothing prevents another resource group
720to overlap with that allocation.
721
722In this example a new exclusive resource group will be created on a L2 CAT
723system with two L2 cache instances that can be configured with an 8-bit
724capacity bitmask. The new exclusive resource group will be configured to use
72525% of each cache instance.
726
727# mount -t resctrl resctrl /sys/fs/resctrl/
728# cd /sys/fs/resctrl
729
730First, we observe that the default group is configured to allocate to all L2
731cache:
732
733# cat schemata
734L2:0=ff;1=ff
735
736We could attempt to create the new resource group at this point, but it will
737fail because of the overlap with the schemata of the default group:
738# mkdir p0
739# echo 'L2:0=0x3;1=0x3' > p0/schemata
740# cat p0/mode
741shareable
742# echo exclusive > p0/mode
743-sh: echo: write error: Invalid argument
744# cat info/last_cmd_status
745schemata overlaps
746
747To ensure that there is no overlap with another resource group the default
748resource group's schemata has to change, making it possible for the new
749resource group to become exclusive.
750# echo 'L2:0=0xfc;1=0xfc' > schemata
751# echo exclusive > p0/mode
752# grep . p0/*
753p0/cpus:0
754p0/mode:exclusive
755p0/schemata:L2:0=03;1=03
756p0/size:L2:0=262144;1=262144
757
758A new resource group will on creation not overlap with an exclusive resource
759group:
760# mkdir p1
761# grep . p1/*
762p1/cpus:0
763p1/mode:shareable
764p1/schemata:L2:0=fc;1=fc
765p1/size:L2:0=786432;1=786432
766
767The bit_usage will reflect how the cache is used:
768# cat info/L2/bit_usage
7690=SSSSSSEE;1=SSSSSSEE
770
771A resource group cannot be forced to overlap with an exclusive resource group:
772# echo 'L2:0=0x1;1=0x1' > p1/schemata
773-sh: echo: write error: Invalid argument
774# cat info/last_cmd_status
775overlaps with exclusive group
776
777Example of Cache Pseudo-Locking
778-------------------------------
779Lock portion of L2 cache from cache id 1 using CBM 0x3. Pseudo-locked
780region is exposed at /dev/pseudo_lock/newlock that can be provided to
781application for argument to mmap().
782
783# mount -t resctrl resctrl /sys/fs/resctrl/
784# cd /sys/fs/resctrl
785
786Ensure that there are bits available that can be pseudo-locked, since only
787unused bits can be pseudo-locked the bits to be pseudo-locked needs to be
788removed from the default resource group's schemata:
789# cat info/L2/bit_usage
7900=SSSSSSSS;1=SSSSSSSS
791# echo 'L2:1=0xfc' > schemata
792# cat info/L2/bit_usage
7930=SSSSSSSS;1=SSSSSS00
794
795Create a new resource group that will be associated with the pseudo-locked
796region, indicate that it will be used for a pseudo-locked region, and
797configure the requested pseudo-locked region capacity bitmask:
798
799# mkdir newlock
800# echo pseudo-locksetup > newlock/mode
801# echo 'L2:1=0x3' > newlock/schemata
802
803On success the resource group's mode will change to pseudo-locked, the
804bit_usage will reflect the pseudo-locked region, and the character device
805exposing the pseudo-locked region will exist:
806
807# cat newlock/mode
808pseudo-locked
809# cat info/L2/bit_usage
8100=SSSSSSSS;1=SSSSSSPP
811# ls -l /dev/pseudo_lock/newlock
812crw------- 1 root root 243, 0 Apr 3 05:01 /dev/pseudo_lock/newlock
813
814/*
815 * Example code to access one page of pseudo-locked cache region
816 * from user space.
817 */
818#define _GNU_SOURCE
819#include <fcntl.h>
820#include <sched.h>
821#include <stdio.h>
822#include <stdlib.h>
823#include <unistd.h>
824#include <sys/mman.h>
825
826/*
827 * It is required that the application runs with affinity to only
828 * cores associated with the pseudo-locked region. Here the cpu
829 * is hardcoded for convenience of example.
830 */
831static int cpuid = 2;
832
833int main(int argc, char *argv[])
834{
835 cpu_set_t cpuset;
836 long page_size;
837 void *mapping;
838 int dev_fd;
839 int ret;
840
841 page_size = sysconf(_SC_PAGESIZE);
842
843 CPU_ZERO(&cpuset);
844 CPU_SET(cpuid, &cpuset);
845 ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
846 if (ret < 0) {
847 perror("sched_setaffinity");
848 exit(EXIT_FAILURE);
849 }
850
851 dev_fd = open("/dev/pseudo_lock/newlock", O_RDWR);
852 if (dev_fd < 0) {
853 perror("open");
854 exit(EXIT_FAILURE);
855 }
856
857 mapping = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
858 dev_fd, 0);
859 if (mapping == MAP_FAILED) {
860 perror("mmap");
861 close(dev_fd);
862 exit(EXIT_FAILURE);
863 }
864
865 /* Application interacts with pseudo-locked memory @mapping */
866
867 ret = munmap(mapping, page_size);
868 if (ret < 0) {
869 perror("munmap");
870 close(dev_fd);
871 exit(EXIT_FAILURE);
872 }
873
874 close(dev_fd);
875 exit(EXIT_SUCCESS);
876}
877
878Locking between applications
879----------------------------
506 880
507Certain operations on the resctrl filesystem, composed of read/writes 881Certain operations on the resctrl filesystem, composed of read/writes
508to/from multiple files, must be atomic. 882to/from multiple files, must be atomic.
@@ -510,7 +884,7 @@ to/from multiple files, must be atomic.
510As an example, the allocation of an exclusive reservation of L3 cache 884As an example, the allocation of an exclusive reservation of L3 cache
511involves: 885involves:
512 886
513 1. Read the cbmmasks from each directory 887 1. Read the cbmmasks from each directory or the per-resource "bit_usage"
514 2. Find a contiguous set of bits in the global CBM bitmask that is clear 888 2. Find a contiguous set of bits in the global CBM bitmask that is clear
515 in any of the directory cbmmasks 889 in any of the directory cbmmasks
516 3. Create a new directory 890 3. Create a new directory
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 8d109ef67ab6..ad6d2a80cf05 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -92,9 +92,7 @@ APICs
92Timing 92Timing
93 93
94 notsc 94 notsc
95 Don't use the CPU time stamp counter to read the wall time. 95 Deprecated, use tsc=unstable instead.
96 This can be used to work around timing problems on multiprocessor systems
97 with not properly synchronized CPUs.
98 96
99 nohpet 97 nohpet
100 Don't use the HPET timer. 98 Don't use the HPET timer.
@@ -156,6 +154,10 @@ NUMA
156 If given as an integer, fills all system RAM with N fake nodes 154 If given as an integer, fills all system RAM with N fake nodes
157 interleaved over physical nodes. 155 interleaved over physical nodes.
158 156
157 numa=fake=<N>U
158 If given as an integer followed by 'U', it will divide each
159 physical node into N emulated nodes.
160
159ACPI 161ACPI
160 162
161 acpi=off Don't enable ACPI 163 acpi=off Don't enable ACPI
diff --git a/MAINTAINERS b/MAINTAINERS
index f6a9b0842319..629e08703c82 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5444,6 +5444,7 @@ F: drivers/iommu/exynos-iommu.c
5444 5444
5445EZchip NPS platform support 5445EZchip NPS platform support
5446M: Vineet Gupta <vgupta@synopsys.com> 5446M: Vineet Gupta <vgupta@synopsys.com>
5447M: Ofer Levi <oferle@mellanox.com>
5447S: Supported 5448S: Supported
5448F: arch/arc/plat-eznps 5449F: arch/arc/plat-eznps
5449F: arch/arc/boot/dts/eznps.dts 5450F: arch/arc/boot/dts/eznps.dts
@@ -5929,7 +5930,7 @@ F: Documentation/dev-tools/gcov.rst
5929 5930
5930GDB KERNEL DEBUGGING HELPER SCRIPTS 5931GDB KERNEL DEBUGGING HELPER SCRIPTS
5931M: Jan Kiszka <jan.kiszka@siemens.com> 5932M: Jan Kiszka <jan.kiszka@siemens.com>
5932M: Kieran Bingham <kieran@bingham.xyz> 5933M: Kieran Bingham <kbingham@kernel.org>
5933S: Supported 5934S: Supported
5934F: scripts/gdb/ 5935F: scripts/gdb/
5935 5936
@@ -8316,10 +8317,16 @@ M: Jade Alglave <j.alglave@ucl.ac.uk>
8316M: Luc Maranget <luc.maranget@inria.fr> 8317M: Luc Maranget <luc.maranget@inria.fr>
8317M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> 8318M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
8318R: Akira Yokosawa <akiyks@gmail.com> 8319R: Akira Yokosawa <akiyks@gmail.com>
8320R: Daniel Lustig <dlustig@nvidia.com>
8319L: linux-kernel@vger.kernel.org 8321L: linux-kernel@vger.kernel.org
8322L: linux-arch@vger.kernel.org
8320S: Supported 8323S: Supported
8321T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git 8324T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
8322F: tools/memory-model/ 8325F: tools/memory-model/
8326F: Documentation/atomic_bitops.txt
8327F: Documentation/atomic_t.txt
8328F: Documentation/core-api/atomic_ops.rst
8329F: Documentation/core-api/refcount-vs-atomic.rst
8323F: Documentation/memory-barriers.txt 8330F: Documentation/memory-barriers.txt
8324 8331
8325LINUX SECURITY MODULE (LSM) FRAMEWORK 8332LINUX SECURITY MODULE (LSM) FRAMEWORK
@@ -12038,9 +12045,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
12038F: Documentation/RCU/ 12045F: Documentation/RCU/
12039X: Documentation/RCU/torture.txt 12046X: Documentation/RCU/torture.txt
12040F: include/linux/rcu* 12047F: include/linux/rcu*
12041X: include/linux/srcu.h 12048X: include/linux/srcu*.h
12042F: kernel/rcu/ 12049F: kernel/rcu/
12043X: kernel/torture.c 12050X: kernel/rcu/srcu*.c
12044 12051
12045REAL TIME CLOCK (RTC) SUBSYSTEM 12052REAL TIME CLOCK (RTC) SUBSYSTEM
12046M: Alessandro Zummo <a.zummo@towertech.it> 12053M: Alessandro Zummo <a.zummo@towertech.it>
@@ -13077,8 +13084,8 @@ L: linux-kernel@vger.kernel.org
13077W: http://www.rdrop.com/users/paulmck/RCU/ 13084W: http://www.rdrop.com/users/paulmck/RCU/
13078S: Supported 13085S: Supported
13079T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git 13086T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
13080F: include/linux/srcu.h 13087F: include/linux/srcu*.h
13081F: kernel/rcu/srcu.c 13088F: kernel/rcu/srcu*.c
13082 13089
13083SERIAL LOW-POWER INTER-CHIP MEDIA BUS (SLIMbus) 13090SERIAL LOW-POWER INTER-CHIP MEDIA BUS (SLIMbus)
13084M: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> 13091M: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
@@ -14437,6 +14444,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
14437F: Documentation/RCU/torture.txt 14444F: Documentation/RCU/torture.txt
14438F: kernel/torture.c 14445F: kernel/torture.c
14439F: kernel/rcu/rcutorture.c 14446F: kernel/rcu/rcutorture.c
14447F: kernel/rcu/rcuperf.c
14440F: kernel/locking/locktorture.c 14448F: kernel/locking/locktorture.c
14441 14449
14442TOSHIBA ACPI EXTRAS DRIVER 14450TOSHIBA ACPI EXTRAS DRIVER
diff --git a/Makefile b/Makefile
index 85f3481a56d6..863f58503bee 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
2VERSION = 4 2VERSION = 4
3PATCHLEVEL = 18 3PATCHLEVEL = 18
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc7 5EXTRAVERSION =
6NAME = Merciless Moray 6NAME = Merciless Moray
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 767bfdd42992..150a1c5d6a2c 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -18,11 +18,11 @@
18 * To ensure dependency ordering is preserved for the _relaxed and 18 * To ensure dependency ordering is preserved for the _relaxed and
19 * _release atomics, an smp_read_barrier_depends() is unconditionally 19 * _release atomics, an smp_read_barrier_depends() is unconditionally
20 * inserted into the _relaxed variants, which are used to build the 20 * inserted into the _relaxed variants, which are used to build the
21 * barriered versions. To avoid redundant back-to-back fences, we can 21 * barriered versions. Avoid redundant back-to-back fences in the
22 * define the _acquire and _fence versions explicitly. 22 * _acquire and _fence versions.
23 */ 23 */
24#define __atomic_op_acquire(op, args...) op##_relaxed(args) 24#define __atomic_acquire_fence()
25#define __atomic_op_fence __atomic_op_release 25#define __atomic_post_full_fence()
26 26
27#define ATOMIC_INIT(i) { (i) } 27#define ATOMIC_INIT(i) { (i) }
28#define ATOMIC64_INIT(i) { (i) } 28#define ATOMIC64_INIT(i) { (i) }
@@ -206,7 +206,7 @@ ATOMIC_OPS(xor, xor)
206#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) 206#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
207 207
208/** 208/**
209 * __atomic_add_unless - add unless the number is a given value 209 * atomic_fetch_add_unless - add unless the number is a given value
210 * @v: pointer of type atomic_t 210 * @v: pointer of type atomic_t
211 * @a: the amount to add to v... 211 * @a: the amount to add to v...
212 * @u: ...unless v is equal to u. 212 * @u: ...unless v is equal to u.
@@ -214,7 +214,7 @@ ATOMIC_OPS(xor, xor)
214 * Atomically adds @a to @v, so long as it was not @u. 214 * Atomically adds @a to @v, so long as it was not @u.
215 * Returns the old value of @v. 215 * Returns the old value of @v.
216 */ 216 */
217static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) 217static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
218{ 218{
219 int c, new, old; 219 int c, new, old;
220 smp_mb(); 220 smp_mb();
@@ -235,38 +235,39 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
235 smp_mb(); 235 smp_mb();
236 return old; 236 return old;
237} 237}
238 238#define atomic_fetch_add_unless atomic_fetch_add_unless
239 239
240/** 240/**
241 * atomic64_add_unless - add unless the number is a given value 241 * atomic64_fetch_add_unless - add unless the number is a given value
242 * @v: pointer of type atomic64_t 242 * @v: pointer of type atomic64_t
243 * @a: the amount to add to v... 243 * @a: the amount to add to v...
244 * @u: ...unless v is equal to u. 244 * @u: ...unless v is equal to u.
245 * 245 *
246 * Atomically adds @a to @v, so long as it was not @u. 246 * Atomically adds @a to @v, so long as it was not @u.
247 * Returns true iff @v was not @u. 247 * Returns the old value of @v.
248 */ 248 */
249static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) 249static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
250{ 250{
251 long c, tmp; 251 long c, new, old;
252 smp_mb(); 252 smp_mb();
253 __asm__ __volatile__( 253 __asm__ __volatile__(
254 "1: ldq_l %[tmp],%[mem]\n" 254 "1: ldq_l %[old],%[mem]\n"
255 " cmpeq %[tmp],%[u],%[c]\n" 255 " cmpeq %[old],%[u],%[c]\n"
256 " addq %[tmp],%[a],%[tmp]\n" 256 " addq %[old],%[a],%[new]\n"
257 " bne %[c],2f\n" 257 " bne %[c],2f\n"
258 " stq_c %[tmp],%[mem]\n" 258 " stq_c %[new],%[mem]\n"
259 " beq %[tmp],3f\n" 259 " beq %[new],3f\n"
260 "2:\n" 260 "2:\n"
261 ".subsection 2\n" 261 ".subsection 2\n"
262 "3: br 1b\n" 262 "3: br 1b\n"
263 ".previous" 263 ".previous"
264 : [tmp] "=&r"(tmp), [c] "=&r"(c) 264 : [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
265 : [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u) 265 : [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
266 : "memory"); 266 : "memory");
267 smp_mb(); 267 smp_mb();
268 return !c; 268 return old;
269} 269}
270#define atomic64_fetch_add_unless atomic64_fetch_add_unless
270 271
271/* 272/*
272 * atomic64_dec_if_positive - decrement by 1 if old value positive 273 * atomic64_dec_if_positive - decrement by 1 if old value positive
@@ -295,31 +296,6 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
295 smp_mb(); 296 smp_mb();
296 return old - 1; 297 return old - 1;
297} 298}
298 299#define atomic64_dec_if_positive atomic64_dec_if_positive
299#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
300
301#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
302#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
303
304#define atomic_dec_return(v) atomic_sub_return(1,(v))
305#define atomic64_dec_return(v) atomic64_sub_return(1,(v))
306
307#define atomic_inc_return(v) atomic_add_return(1,(v))
308#define atomic64_inc_return(v) atomic64_add_return(1,(v))
309
310#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
311#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
312
313#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
314#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) == 0)
315
316#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
317#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0)
318
319#define atomic_inc(v) atomic_add(1,(v))
320#define atomic64_inc(v) atomic64_add(1,(v))
321
322#define atomic_dec(v) atomic_sub(1,(v))
323#define atomic64_dec(v) atomic64_sub(1,(v))
324 300
325#endif /* _ALPHA_ATOMIC_H */ 301#endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 9cf59fc60eab..5151d81476a1 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -50,6 +50,9 @@ config ARC
50 select HAVE_KERNEL_LZMA 50 select HAVE_KERNEL_LZMA
51 select ARCH_HAS_PTE_SPECIAL 51 select ARCH_HAS_PTE_SPECIAL
52 52
53config ARCH_HAS_CACHE_LINE_SIZE
54 def_bool y
55
53config MIGHT_HAVE_PCI 56config MIGHT_HAVE_PCI
54 bool 57 bool
55 58
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 11859287c52a..4e0072730241 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -187,7 +187,8 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \
187ATOMIC_OPS(add, +=, add) 187ATOMIC_OPS(add, +=, add)
188ATOMIC_OPS(sub, -=, sub) 188ATOMIC_OPS(sub, -=, sub)
189 189
190#define atomic_andnot atomic_andnot 190#define atomic_andnot atomic_andnot
191#define atomic_fetch_andnot atomic_fetch_andnot
191 192
192#undef ATOMIC_OPS 193#undef ATOMIC_OPS
193#define ATOMIC_OPS(op, c_op, asm_op) \ 194#define ATOMIC_OPS(op, c_op, asm_op) \
@@ -296,8 +297,6 @@ ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
296 ATOMIC_FETCH_OP(op, c_op, asm_op) 297 ATOMIC_FETCH_OP(op, c_op, asm_op)
297 298
298ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) 299ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
299#define atomic_andnot(mask, v) atomic_and(~(mask), (v))
300#define atomic_fetch_andnot(mask, v) atomic_fetch_and(~(mask), (v))
301ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) 300ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
302ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) 301ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
303 302
@@ -308,48 +307,6 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
308#undef ATOMIC_OP_RETURN 307#undef ATOMIC_OP_RETURN
309#undef ATOMIC_OP 308#undef ATOMIC_OP
310 309
311/**
312 * __atomic_add_unless - add unless the number is a given value
313 * @v: pointer of type atomic_t
314 * @a: the amount to add to v...
315 * @u: ...unless v is equal to u.
316 *
317 * Atomically adds @a to @v, so long as it was not @u.
318 * Returns the old value of @v
319 */
320#define __atomic_add_unless(v, a, u) \
321({ \
322 int c, old; \
323 \
324 /* \
325 * Explicit full memory barrier needed before/after as \
326 * LLOCK/SCOND thmeselves don't provide any such semantics \
327 */ \
328 smp_mb(); \
329 \
330 c = atomic_read(v); \
331 while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
332 c = old; \
333 \
334 smp_mb(); \
335 \
336 c; \
337})
338
339#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
340
341#define atomic_inc(v) atomic_add(1, v)
342#define atomic_dec(v) atomic_sub(1, v)
343
344#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
345#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
346#define atomic_inc_return(v) atomic_add_return(1, (v))
347#define atomic_dec_return(v) atomic_sub_return(1, (v))
348#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
349
350#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
351
352
353#ifdef CONFIG_GENERIC_ATOMIC64 310#ifdef CONFIG_GENERIC_ATOMIC64
354 311
355#include <asm-generic/atomic64.h> 312#include <asm-generic/atomic64.h>
@@ -472,7 +429,8 @@ static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \
472 ATOMIC64_OP_RETURN(op, op1, op2) \ 429 ATOMIC64_OP_RETURN(op, op1, op2) \
473 ATOMIC64_FETCH_OP(op, op1, op2) 430 ATOMIC64_FETCH_OP(op, op1, op2)
474 431
475#define atomic64_andnot atomic64_andnot 432#define atomic64_andnot atomic64_andnot
433#define atomic64_fetch_andnot atomic64_fetch_andnot
476 434
477ATOMIC64_OPS(add, add.f, adc) 435ATOMIC64_OPS(add, add.f, adc)
478ATOMIC64_OPS(sub, sub.f, sbc) 436ATOMIC64_OPS(sub, sub.f, sbc)
@@ -559,53 +517,43 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
559 517
560 return val; 518 return val;
561} 519}
520#define atomic64_dec_if_positive atomic64_dec_if_positive
562 521
563/** 522/**
564 * atomic64_add_unless - add unless the number is a given value 523 * atomic64_fetch_add_unless - add unless the number is a given value
565 * @v: pointer of type atomic64_t 524 * @v: pointer of type atomic64_t
566 * @a: the amount to add to v... 525 * @a: the amount to add to v...
567 * @u: ...unless v is equal to u. 526 * @u: ...unless v is equal to u.
568 * 527 *
569 * if (v != u) { v += a; ret = 1} else {ret = 0} 528 * Atomically adds @a to @v, if it was not @u.
570 * Returns 1 iff @v was not @u (i.e. if add actually happened) 529 * Returns the old value of @v
571 */ 530 */
572static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) 531static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
532 long long u)
573{ 533{
574 long long val; 534 long long old, temp;
575 int op_done;
576 535
577 smp_mb(); 536 smp_mb();
578 537
579 __asm__ __volatile__( 538 __asm__ __volatile__(
580 "1: llockd %0, [%2] \n" 539 "1: llockd %0, [%2] \n"
581 " mov %1, 1 \n"
582 " brne %L0, %L4, 2f # continue to add since v != u \n" 540 " brne %L0, %L4, 2f # continue to add since v != u \n"
583 " breq.d %H0, %H4, 3f # return since v == u \n" 541 " breq.d %H0, %H4, 3f # return since v == u \n"
584 " mov %1, 0 \n"
585 "2: \n" 542 "2: \n"
586 " add.f %L0, %L0, %L3 \n" 543 " add.f %L1, %L0, %L3 \n"
587 " adc %H0, %H0, %H3 \n" 544 " adc %H1, %H0, %H3 \n"
588 " scondd %0, [%2] \n" 545 " scondd %1, [%2] \n"
589 " bnz 1b \n" 546 " bnz 1b \n"
590 "3: \n" 547 "3: \n"
591 : "=&r"(val), "=&r" (op_done) 548 : "=&r"(old), "=&r" (temp)
592 : "r"(&v->counter), "r"(a), "r"(u) 549 : "r"(&v->counter), "r"(a), "r"(u)
593 : "cc"); /* memory clobber comes from smp_mb() */ 550 : "cc"); /* memory clobber comes from smp_mb() */
594 551
595 smp_mb(); 552 smp_mb();
596 553
597 return op_done; 554 return old;
598} 555}
599 556#define atomic64_fetch_add_unless atomic64_fetch_add_unless
600#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
601#define atomic64_inc(v) atomic64_add(1LL, (v))
602#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
603#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
604#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
605#define atomic64_dec(v) atomic64_sub(1LL, (v))
606#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
607#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
608#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
609 557
610#endif /* !CONFIG_GENERIC_ATOMIC64 */ 558#endif /* !CONFIG_GENERIC_ATOMIC64 */
611 559
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 8486f328cc5d..ff7d3232764a 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -48,7 +48,9 @@
48}) 48})
49 49
50/* Largest line length for either L1 or L2 is 128 bytes */ 50/* Largest line length for either L1 or L2 is 128 bytes */
51#define ARCH_DMA_MINALIGN 128 51#define SMP_CACHE_BYTES 128
52#define cache_line_size() SMP_CACHE_BYTES
53#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES
52 54
53extern void arc_cache_init(void); 55extern void arc_cache_init(void);
54extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); 56extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index d5da2115d78a..03d6bb0f4e13 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -17,8 +17,11 @@
17#ifndef __ASM_ARC_UDELAY_H 17#ifndef __ASM_ARC_UDELAY_H
18#define __ASM_ARC_UDELAY_H 18#define __ASM_ARC_UDELAY_H
19 19
20#include <asm-generic/types.h>
20#include <asm/param.h> /* HZ */ 21#include <asm/param.h> /* HZ */
21 22
23extern unsigned long loops_per_jiffy;
24
22static inline void __delay(unsigned long loops) 25static inline void __delay(unsigned long loops)
23{ 26{
24 __asm__ __volatile__( 27 __asm__ __volatile__(
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 9dbe645ee127..25c631942500 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -1038,7 +1038,7 @@ void flush_cache_mm(struct mm_struct *mm)
1038void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, 1038void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
1039 unsigned long pfn) 1039 unsigned long pfn)
1040{ 1040{
1041 unsigned int paddr = pfn << PAGE_SHIFT; 1041 phys_addr_t paddr = pfn << PAGE_SHIFT;
1042 1042
1043 u_vaddr &= PAGE_MASK; 1043 u_vaddr &= PAGE_MASK;
1044 1044
@@ -1058,8 +1058,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
1058 unsigned long u_vaddr) 1058 unsigned long u_vaddr)
1059{ 1059{
1060 /* TBD: do we really need to clear the kernel mapping */ 1060 /* TBD: do we really need to clear the kernel mapping */
1061 __flush_dcache_page(page_address(page), u_vaddr); 1061 __flush_dcache_page((phys_addr_t)page_address(page), u_vaddr);
1062 __flush_dcache_page(page_address(page), page_address(page)); 1062 __flush_dcache_page((phys_addr_t)page_address(page),
1063 (phys_addr_t)page_address(page));
1063 1064
1064} 1065}
1065 1066
@@ -1246,6 +1247,16 @@ void __init arc_cache_init_master(void)
1246 } 1247 }
1247 } 1248 }
1248 1249
1250 /*
1251 * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger
1252 * or equal to any cache line length.
1253 */
1254 BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES,
1255 "SMP_CACHE_BYTES must be >= any cache line length");
1256 if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES))
1257 panic("L2 Cache line [%d] > kernel Config [%d]\n",
1258 l2_line_sz, SMP_CACHE_BYTES);
1259
1249 /* Note that SLC disable not formally supported till HS 3.0 */ 1260 /* Note that SLC disable not formally supported till HS 3.0 */
1250 if (is_isa_arcv2() && l2_line_sz && !slc_enable) 1261 if (is_isa_arcv2() && l2_line_sz && !slc_enable)
1251 arc_slc_disable(); 1262 arc_slc_disable();
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 8c1071840979..ec47e6079f5d 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -129,14 +129,59 @@ int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
129 return ret; 129 return ret;
130} 130}
131 131
132/*
133 * Cache operations depending on function and direction argument, inspired by
134 * https://lkml.org/lkml/2018/5/18/979
135 * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
136 * dma-mapping: provide a generic dma-noncoherent implementation)"
137 *
138 * | map == for_device | unmap == for_cpu
139 * |----------------------------------------------------------------
140 * TO_DEV | writeback writeback | none none
141 * FROM_DEV | invalidate invalidate | invalidate* invalidate*
142 * BIDIR | writeback+inv writeback+inv | invalidate invalidate
143 *
144 * [*] needed for CPU speculative prefetches
145 *
146 * NOTE: we don't check the validity of direction argument as it is done in
147 * upper layer functions (in include/linux/dma-mapping.h)
148 */
149
132void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, 150void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
133 size_t size, enum dma_data_direction dir) 151 size_t size, enum dma_data_direction dir)
134{ 152{
135 dma_cache_wback(paddr, size); 153 switch (dir) {
154 case DMA_TO_DEVICE:
155 dma_cache_wback(paddr, size);
156 break;
157
158 case DMA_FROM_DEVICE:
159 dma_cache_inv(paddr, size);
160 break;
161
162 case DMA_BIDIRECTIONAL:
163 dma_cache_wback_inv(paddr, size);
164 break;
165
166 default:
167 break;
168 }
136} 169}
137 170
138void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, 171void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
139 size_t size, enum dma_data_direction dir) 172 size_t size, enum dma_data_direction dir)
140{ 173{
141 dma_cache_inv(paddr, size); 174 switch (dir) {
175 case DMA_TO_DEVICE:
176 break;
177
178 /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
179 case DMA_FROM_DEVICE:
180 case DMA_BIDIRECTIONAL:
181 dma_cache_inv(paddr, size);
182 break;
183
184 default:
185 break;
186 }
142} 187}
diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h
index 0c7d11022d0f..4f6a1673b3a6 100644
--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ b/arch/arc/plat-eznps/include/plat/ctop.h
@@ -21,6 +21,7 @@
21#error "Incorrect ctop.h include" 21#error "Incorrect ctop.h include"
22#endif 22#endif
23 23
24#include <linux/types.h>
24#include <soc/nps/common.h> 25#include <soc/nps/common.h>
25 26
26/* core auxiliary registers */ 27/* core auxiliary registers */
@@ -143,6 +144,15 @@ struct nps_host_reg_gim_p_int_dst {
143}; 144};
144 145
145/* AUX registers definition */ 146/* AUX registers definition */
147struct nps_host_reg_aux_dpc {
148 union {
149 struct {
150 u32 ien:1, men:1, hen:1, reserved:29;
151 };
152 u32 value;
153 };
154};
155
146struct nps_host_reg_aux_udmc { 156struct nps_host_reg_aux_udmc {
147 union { 157 union {
148 struct { 158 struct {
diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c
index 2388de3d09ef..ed0077ef666e 100644
--- a/arch/arc/plat-eznps/mtm.c
+++ b/arch/arc/plat-eznps/mtm.c
@@ -15,6 +15,8 @@
15 */ 15 */
16 16
17#include <linux/smp.h> 17#include <linux/smp.h>
18#include <linux/init.h>
19#include <linux/kernel.h>
18#include <linux/io.h> 20#include <linux/io.h>
19#include <linux/log2.h> 21#include <linux/log2.h>
20#include <asm/arcregs.h> 22#include <asm/arcregs.h>
@@ -157,10 +159,10 @@ void mtm_enable_core(unsigned int cpu)
157/* Verify and set the value of the mtm hs counter */ 159/* Verify and set the value of the mtm hs counter */
158static int __init set_mtm_hs_ctr(char *ctr_str) 160static int __init set_mtm_hs_ctr(char *ctr_str)
159{ 161{
160 long hs_ctr; 162 int hs_ctr;
161 int ret; 163 int ret;
162 164
163 ret = kstrtol(ctr_str, 0, &hs_ctr); 165 ret = kstrtoint(ctr_str, 0, &hs_ctr);
164 166
165 if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) { 167 if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) {
166 pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", 168 pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n",
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 843edfd000be..d7a81284c272 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -337,8 +337,8 @@ config ARCH_MULTIPLATFORM
337 select TIMER_OF 337 select TIMER_OF
338 select COMMON_CLK 338 select COMMON_CLK
339 select GENERIC_CLOCKEVENTS 339 select GENERIC_CLOCKEVENTS
340 select GENERIC_IRQ_MULTI_HANDLER
340 select MIGHT_HAVE_PCI 341 select MIGHT_HAVE_PCI
341 select MULTI_IRQ_HANDLER
342 select PCI_DOMAINS if PCI 342 select PCI_DOMAINS if PCI
343 select SPARSE_IRQ 343 select SPARSE_IRQ
344 select USE_OF 344 select USE_OF
@@ -465,9 +465,9 @@ config ARCH_DOVE
465 bool "Marvell Dove" 465 bool "Marvell Dove"
466 select CPU_PJ4 466 select CPU_PJ4
467 select GENERIC_CLOCKEVENTS 467 select GENERIC_CLOCKEVENTS
468 select GENERIC_IRQ_MULTI_HANDLER
468 select GPIOLIB 469 select GPIOLIB
469 select MIGHT_HAVE_PCI 470 select MIGHT_HAVE_PCI
470 select MULTI_IRQ_HANDLER
471 select MVEBU_MBUS 471 select MVEBU_MBUS
472 select PINCTRL 472 select PINCTRL
473 select PINCTRL_DOVE 473 select PINCTRL_DOVE
@@ -512,8 +512,8 @@ config ARCH_LPC32XX
512 select COMMON_CLK 512 select COMMON_CLK
513 select CPU_ARM926T 513 select CPU_ARM926T
514 select GENERIC_CLOCKEVENTS 514 select GENERIC_CLOCKEVENTS
515 select GENERIC_IRQ_MULTI_HANDLER
515 select GPIOLIB 516 select GPIOLIB
516 select MULTI_IRQ_HANDLER
517 select SPARSE_IRQ 517 select SPARSE_IRQ
518 select USE_OF 518 select USE_OF
519 help 519 help
@@ -532,11 +532,11 @@ config ARCH_PXA
532 select TIMER_OF 532 select TIMER_OF
533 select CPU_XSCALE if !CPU_XSC3 533 select CPU_XSCALE if !CPU_XSC3
534 select GENERIC_CLOCKEVENTS 534 select GENERIC_CLOCKEVENTS
535 select GENERIC_IRQ_MULTI_HANDLER
535 select GPIO_PXA 536 select GPIO_PXA
536 select GPIOLIB 537 select GPIOLIB
537 select HAVE_IDE 538 select HAVE_IDE
538 select IRQ_DOMAIN 539 select IRQ_DOMAIN
539 select MULTI_IRQ_HANDLER
540 select PLAT_PXA 540 select PLAT_PXA
541 select SPARSE_IRQ 541 select SPARSE_IRQ
542 help 542 help
@@ -572,11 +572,11 @@ config ARCH_SA1100
572 select CPU_FREQ 572 select CPU_FREQ
573 select CPU_SA1100 573 select CPU_SA1100
574 select GENERIC_CLOCKEVENTS 574 select GENERIC_CLOCKEVENTS
575 select GENERIC_IRQ_MULTI_HANDLER
575 select GPIOLIB 576 select GPIOLIB
576 select HAVE_IDE 577 select HAVE_IDE
577 select IRQ_DOMAIN 578 select IRQ_DOMAIN
578 select ISA 579 select ISA
579 select MULTI_IRQ_HANDLER
580 select NEED_MACH_MEMORY_H 580 select NEED_MACH_MEMORY_H
581 select SPARSE_IRQ 581 select SPARSE_IRQ
582 help 582 help
@@ -590,10 +590,10 @@ config ARCH_S3C24XX
590 select GENERIC_CLOCKEVENTS 590 select GENERIC_CLOCKEVENTS
591 select GPIO_SAMSUNG 591 select GPIO_SAMSUNG
592 select GPIOLIB 592 select GPIOLIB
593 select GENERIC_IRQ_MULTI_HANDLER
593 select HAVE_S3C2410_I2C if I2C 594 select HAVE_S3C2410_I2C if I2C
594 select HAVE_S3C2410_WATCHDOG if WATCHDOG 595 select HAVE_S3C2410_WATCHDOG if WATCHDOG
595 select HAVE_S3C_RTC if RTC_CLASS 596 select HAVE_S3C_RTC if RTC_CLASS
596 select MULTI_IRQ_HANDLER
597 select NEED_MACH_IO_H 597 select NEED_MACH_IO_H
598 select SAMSUNG_ATAGS 598 select SAMSUNG_ATAGS
599 select USE_OF 599 select USE_OF
@@ -627,10 +627,10 @@ config ARCH_OMAP1
627 select CLKSRC_MMIO 627 select CLKSRC_MMIO
628 select GENERIC_CLOCKEVENTS 628 select GENERIC_CLOCKEVENTS
629 select GENERIC_IRQ_CHIP 629 select GENERIC_IRQ_CHIP
630 select GENERIC_IRQ_MULTI_HANDLER
630 select GPIOLIB 631 select GPIOLIB
631 select HAVE_IDE 632 select HAVE_IDE
632 select IRQ_DOMAIN 633 select IRQ_DOMAIN
633 select MULTI_IRQ_HANDLER
634 select NEED_MACH_IO_H if PCCARD 634 select NEED_MACH_IO_H if PCCARD
635 select NEED_MACH_MEMORY_H 635 select NEED_MACH_MEMORY_H
636 select SPARSE_IRQ 636 select SPARSE_IRQ
@@ -921,11 +921,6 @@ config IWMMXT
921 Enable support for iWMMXt context switching at run time if 921 Enable support for iWMMXt context switching at run time if
922 running on a CPU that supports it. 922 running on a CPU that supports it.
923 923
924config MULTI_IRQ_HANDLER
925 bool
926 help
927 Allow each machine to specify it's own IRQ handler at run time.
928
929if !MMU 924if !MMU
930source "arch/arm/Kconfig-nommu" 925source "arch/arm/Kconfig-nommu"
931endif 926endif
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 66d0e215a773..f74756641410 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -130,7 +130,7 @@ static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
130} 130}
131#define atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed 131#define atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed
132 132
133static inline int __atomic_add_unless(atomic_t *v, int a, int u) 133static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
134{ 134{
135 int oldval, newval; 135 int oldval, newval;
136 unsigned long tmp; 136 unsigned long tmp;
@@ -156,6 +156,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
156 156
157 return oldval; 157 return oldval;
158} 158}
159#define atomic_fetch_add_unless atomic_fetch_add_unless
159 160
160#else /* ARM_ARCH_6 */ 161#else /* ARM_ARCH_6 */
161 162
@@ -215,15 +216,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
215 return ret; 216 return ret;
216} 217}
217 218
218static inline int __atomic_add_unless(atomic_t *v, int a, int u) 219#define atomic_fetch_andnot atomic_fetch_andnot
219{
220 int c, old;
221
222 c = atomic_read(v);
223 while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c)
224 c = old;
225 return c;
226}
227 220
228#endif /* __LINUX_ARM_ARCH__ */ 221#endif /* __LINUX_ARM_ARCH__ */
229 222
@@ -254,17 +247,6 @@ ATOMIC_OPS(xor, ^=, eor)
254 247
255#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) 248#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
256 249
257#define atomic_inc(v) atomic_add(1, v)
258#define atomic_dec(v) atomic_sub(1, v)
259
260#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
261#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
262#define atomic_inc_return_relaxed(v) (atomic_add_return_relaxed(1, v))
263#define atomic_dec_return_relaxed(v) (atomic_sub_return_relaxed(1, v))
264#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
265
266#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
267
268#ifndef CONFIG_GENERIC_ATOMIC64 250#ifndef CONFIG_GENERIC_ATOMIC64
269typedef struct { 251typedef struct {
270 long long counter; 252 long long counter;
@@ -494,12 +476,13 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
494 476
495 return result; 477 return result;
496} 478}
479#define atomic64_dec_if_positive atomic64_dec_if_positive
497 480
498static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) 481static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
482 long long u)
499{ 483{
500 long long val; 484 long long oldval, newval;
501 unsigned long tmp; 485 unsigned long tmp;
502 int ret = 1;
503 486
504 smp_mb(); 487 smp_mb();
505 prefetchw(&v->counter); 488 prefetchw(&v->counter);
@@ -508,33 +491,23 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
508"1: ldrexd %0, %H0, [%4]\n" 491"1: ldrexd %0, %H0, [%4]\n"
509" teq %0, %5\n" 492" teq %0, %5\n"
510" teqeq %H0, %H5\n" 493" teqeq %H0, %H5\n"
511" moveq %1, #0\n"
512" beq 2f\n" 494" beq 2f\n"
513" adds %Q0, %Q0, %Q6\n" 495" adds %Q1, %Q0, %Q6\n"
514" adc %R0, %R0, %R6\n" 496" adc %R1, %R0, %R6\n"
515" strexd %2, %0, %H0, [%4]\n" 497" strexd %2, %1, %H1, [%4]\n"
516" teq %2, #0\n" 498" teq %2, #0\n"
517" bne 1b\n" 499" bne 1b\n"
518"2:" 500"2:"
519 : "=&r" (val), "+r" (ret), "=&r" (tmp), "+Qo" (v->counter) 501 : "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter)
520 : "r" (&v->counter), "r" (u), "r" (a) 502 : "r" (&v->counter), "r" (u), "r" (a)
521 : "cc"); 503 : "cc");
522 504
523 if (ret) 505 if (oldval != u)
524 smp_mb(); 506 smp_mb();
525 507
526 return ret; 508 return oldval;
527} 509}
528 510#define atomic64_fetch_add_unless atomic64_fetch_add_unless
529#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
530#define atomic64_inc(v) atomic64_add(1LL, (v))
531#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1LL, (v))
532#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
533#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
534#define atomic64_dec(v) atomic64_sub(1LL, (v))
535#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1LL, (v))
536#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
537#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
538 511
539#endif /* !CONFIG_GENERIC_ATOMIC64 */ 512#endif /* !CONFIG_GENERIC_ATOMIC64 */
540#endif 513#endif
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index 17f1f1a814ff..38badaae8d9d 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -58,6 +58,9 @@ void efi_virtmap_unload(void);
58#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) 58#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__)
59#define efi_is_64bit() (false) 59#define efi_is_64bit() (false)
60 60
61#define efi_table_attr(table, attr, instance) \
62 ((table##_t *)instance)->attr
63
61#define efi_call_proto(protocol, f, instance, ...) \ 64#define efi_call_proto(protocol, f, instance, ...) \
62 ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) 65 ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
63 66
diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index b6f319606e30..c883fcbe93b6 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -31,11 +31,6 @@ extern void asm_do_IRQ(unsigned int, struct pt_regs *);
31void handle_IRQ(unsigned int, struct pt_regs *); 31void handle_IRQ(unsigned int, struct pt_regs *);
32void init_IRQ(void); 32void init_IRQ(void);
33 33
34#ifdef CONFIG_MULTI_IRQ_HANDLER
35extern void (*handle_arch_irq)(struct pt_regs *);
36extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
37#endif
38
39#ifdef CONFIG_SMP 34#ifdef CONFIG_SMP
40extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, 35extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
41 bool exclude_self); 36 bool exclude_self);
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 5c1ad11aa392..bb8851208e17 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -59,7 +59,7 @@ struct machine_desc {
59 void (*init_time)(void); 59 void (*init_time)(void);
60 void (*init_machine)(void); 60 void (*init_machine)(void);
61 void (*init_late)(void); 61 void (*init_late)(void);
62#ifdef CONFIG_MULTI_IRQ_HANDLER 62#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
63 void (*handle_irq)(struct pt_regs *); 63 void (*handle_irq)(struct pt_regs *);
64#endif 64#endif
65 void (*restart)(enum reboot_mode, const char *); 65 void (*restart)(enum reboot_mode, const char *);
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index 0f79e4dec7f9..4ac3a019a46f 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -13,7 +13,6 @@
13extern void timer_tick(void); 13extern void timer_tick(void);
14 14
15typedef void (*clock_access_fn)(struct timespec64 *); 15typedef void (*clock_access_fn)(struct timespec64 *);
16extern int register_persistent_clock(clock_access_fn read_boot, 16extern int register_persistent_clock(clock_access_fn read_persistent);
17 clock_access_fn read_persistent);
18 17
19#endif 18#endif
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index d5562f9ce600..f854148c8d7c 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -292,5 +292,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
292{ 292{
293} 293}
294 294
295static inline void tlb_flush_remove_tables(struct mm_struct *mm)
296{
297}
298
299static inline void tlb_flush_remove_tables_local(void *arg)
300{
301}
302
295#endif /* CONFIG_MMU */ 303#endif /* CONFIG_MMU */
296#endif 304#endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 179a9f6bd1e3..e85a3af9ddeb 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -22,7 +22,7 @@
22#include <asm/glue-df.h> 22#include <asm/glue-df.h>
23#include <asm/glue-pf.h> 23#include <asm/glue-pf.h>
24#include <asm/vfpmacros.h> 24#include <asm/vfpmacros.h>
25#ifndef CONFIG_MULTI_IRQ_HANDLER 25#ifndef CONFIG_GENERIC_IRQ_MULTI_HANDLER
26#include <mach/entry-macro.S> 26#include <mach/entry-macro.S>
27#endif 27#endif
28#include <asm/thread_notify.h> 28#include <asm/thread_notify.h>
@@ -39,7 +39,7 @@
39 * Interrupt handling. 39 * Interrupt handling.
40 */ 40 */
41 .macro irq_handler 41 .macro irq_handler
42#ifdef CONFIG_MULTI_IRQ_HANDLER 42#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
43 ldr r1, =handle_arch_irq 43 ldr r1, =handle_arch_irq
44 mov r0, sp 44 mov r0, sp
45 badr lr, 9997f 45 badr lr, 9997f
@@ -1226,9 +1226,3 @@ vector_addrexcptn:
1226 .globl cr_alignment 1226 .globl cr_alignment
1227cr_alignment: 1227cr_alignment:
1228 .space 4 1228 .space 4
1229
1230#ifdef CONFIG_MULTI_IRQ_HANDLER
1231 .globl handle_arch_irq
1232handle_arch_irq:
1233 .space 4
1234#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 106a1466518d..746565a876dc 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -48,6 +48,7 @@ saved_pc .req lr
48 * from those features make this path too inefficient. 48 * from those features make this path too inefficient.
49 */ 49 */
50ret_fast_syscall: 50ret_fast_syscall:
51__ret_fast_syscall:
51 UNWIND(.fnstart ) 52 UNWIND(.fnstart )
52 UNWIND(.cantunwind ) 53 UNWIND(.cantunwind )
53 disable_irq_notrace @ disable interrupts 54 disable_irq_notrace @ disable interrupts
@@ -78,6 +79,7 @@ fast_work_pending:
78 * call. 79 * call.
79 */ 80 */
80ret_fast_syscall: 81ret_fast_syscall:
82__ret_fast_syscall:
81 UNWIND(.fnstart ) 83 UNWIND(.fnstart )
82 UNWIND(.cantunwind ) 84 UNWIND(.cantunwind )
83 str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 85 str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
@@ -255,7 +257,7 @@ local_restart:
255 tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? 257 tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
256 bne __sys_trace 258 bne __sys_trace
257 259
258 invoke_syscall tbl, scno, r10, ret_fast_syscall 260 invoke_syscall tbl, scno, r10, __ret_fast_syscall
259 261
260 add r1, sp, #S_OFF 262 add r1, sp, #S_OFF
2612: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) 2632: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index ece04a457486..9908dacf9229 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -102,16 +102,6 @@ void __init init_IRQ(void)
102 uniphier_cache_init(); 102 uniphier_cache_init();
103} 103}
104 104
105#ifdef CONFIG_MULTI_IRQ_HANDLER
106void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
107{
108 if (handle_arch_irq)
109 return;
110
111 handle_arch_irq = handle_irq;
112}
113#endif
114
115#ifdef CONFIG_SPARSE_IRQ 105#ifdef CONFIG_SPARSE_IRQ
116int __init arch_probe_nr_irqs(void) 106int __init arch_probe_nr_irqs(void)
117{ 107{
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 35ca494c028c..4c249cb261f3 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1145,7 +1145,7 @@ void __init setup_arch(char **cmdline_p)
1145 1145
1146 reserve_crashkernel(); 1146 reserve_crashkernel();
1147 1147
1148#ifdef CONFIG_MULTI_IRQ_HANDLER 1148#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
1149 handle_arch_irq = mdesc->handle_irq; 1149 handle_arch_irq = mdesc->handle_irq;
1150#endif 1150#endif
1151 1151
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index cf2701cb0de8..078b259ead4e 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -83,29 +83,18 @@ static void dummy_clock_access(struct timespec64 *ts)
83} 83}
84 84
85static clock_access_fn __read_persistent_clock = dummy_clock_access; 85static clock_access_fn __read_persistent_clock = dummy_clock_access;
86static clock_access_fn __read_boot_clock = dummy_clock_access;
87 86
88void read_persistent_clock64(struct timespec64 *ts) 87void read_persistent_clock64(struct timespec64 *ts)
89{ 88{
90 __read_persistent_clock(ts); 89 __read_persistent_clock(ts);
91} 90}
92 91
93void read_boot_clock64(struct timespec64 *ts) 92int __init register_persistent_clock(clock_access_fn read_persistent)
94{
95 __read_boot_clock(ts);
96}
97
98int __init register_persistent_clock(clock_access_fn read_boot,
99 clock_access_fn read_persistent)
100{ 93{
101 /* Only allow the clockaccess functions to be registered once */ 94 /* Only allow the clockaccess functions to be registered once */
102 if (__read_persistent_clock == dummy_clock_access && 95 if (__read_persistent_clock == dummy_clock_access) {
103 __read_boot_clock == dummy_clock_access) {
104 if (read_boot)
105 __read_boot_clock = read_boot;
106 if (read_persistent) 96 if (read_persistent)
107 __read_persistent_clock = read_persistent; 97 __read_persistent_clock = read_persistent;
108
109 return 0; 98 return 0;
110 } 99 }
111 100
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index 8db62cc54a6a..04b2f22c2739 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -212,7 +212,7 @@ static DEFINE_MUTEX(ecard_mutex);
212 */ 212 */
213static void ecard_init_pgtables(struct mm_struct *mm) 213static void ecard_init_pgtables(struct mm_struct *mm)
214{ 214{
215 struct vm_area_struct vma; 215 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, VM_EXEC);
216 216
217 /* We want to set up the page tables for the following mapping: 217 /* We want to set up the page tables for the following mapping:
218 * Virtual Physical 218 * Virtual Physical
@@ -237,9 +237,6 @@ static void ecard_init_pgtables(struct mm_struct *mm)
237 237
238 memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE)); 238 memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE));
239 239
240 vma_init(&vma, mm);
241 vma.vm_flags = VM_EXEC;
242
243 flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE); 240 flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE);
244 flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE); 241 flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE);
245} 242}
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 2438b96004c1..fcc5bfec8bd1 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -110,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
110 } 110 }
111 111
112 sched_clock_register(omap_32k_read_sched_clock, 32, 32768); 112 sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
113 register_persistent_clock(NULL, omap_read_persistent_clock64); 113 register_persistent_clock(omap_read_persistent_clock64);
114 pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n"); 114 pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
115 115
116 return 0; 116 return 0;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 42c090cf0292..3d1011957823 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -74,6 +74,7 @@ config ARM64
74 select GENERIC_CPU_AUTOPROBE 74 select GENERIC_CPU_AUTOPROBE
75 select GENERIC_EARLY_IOREMAP 75 select GENERIC_EARLY_IOREMAP
76 select GENERIC_IDLE_POLL_SETUP 76 select GENERIC_IDLE_POLL_SETUP
77 select GENERIC_IRQ_MULTI_HANDLER
77 select GENERIC_IRQ_PROBE 78 select GENERIC_IRQ_PROBE
78 select GENERIC_IRQ_SHOW 79 select GENERIC_IRQ_SHOW
79 select GENERIC_IRQ_SHOW_LEVEL 80 select GENERIC_IRQ_SHOW_LEVEL
@@ -264,9 +265,6 @@ config ARCH_SUPPORTS_UPROBES
264config ARCH_PROC_KCORE_TEXT 265config ARCH_PROC_KCORE_TEXT
265 def_bool y 266 def_bool y
266 267
267config MULTI_IRQ_HANDLER
268 def_bool y
269
270source "init/Kconfig" 268source "init/Kconfig"
271 269
272source "kernel/Kconfig.freezer" 270source "kernel/Kconfig.freezer"
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 88f5aef7934c..e3a375c4cb83 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -19,33 +19,24 @@
19 * u32 *macp, u8 const rk[], u32 rounds); 19 * u32 *macp, u8 const rk[], u32 rounds);
20 */ 20 */
21ENTRY(ce_aes_ccm_auth_data) 21ENTRY(ce_aes_ccm_auth_data)
22 frame_push 7 22 ldr w8, [x3] /* leftover from prev round? */
23
24 mov x19, x0
25 mov x20, x1
26 mov x21, x2
27 mov x22, x3
28 mov x23, x4
29 mov x24, x5
30
31 ldr w25, [x22] /* leftover from prev round? */
32 ld1 {v0.16b}, [x0] /* load mac */ 23 ld1 {v0.16b}, [x0] /* load mac */
33 cbz w25, 1f 24 cbz w8, 1f
34 sub w25, w25, #16 25 sub w8, w8, #16
35 eor v1.16b, v1.16b, v1.16b 26 eor v1.16b, v1.16b, v1.16b
360: ldrb w7, [x20], #1 /* get 1 byte of input */ 270: ldrb w7, [x1], #1 /* get 1 byte of input */
37 subs w21, w21, #1 28 subs w2, w2, #1
38 add w25, w25, #1 29 add w8, w8, #1
39 ins v1.b[0], w7 30 ins v1.b[0], w7
40 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 31 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
41 beq 8f /* out of input? */ 32 beq 8f /* out of input? */
42 cbnz w25, 0b 33 cbnz w8, 0b
43 eor v0.16b, v0.16b, v1.16b 34 eor v0.16b, v0.16b, v1.16b
441: ld1 {v3.4s}, [x23] /* load first round key */ 351: ld1 {v3.4s}, [x4] /* load first round key */
45 prfm pldl1strm, [x20] 36 prfm pldl1strm, [x1]
46 cmp w24, #12 /* which key size? */ 37 cmp w5, #12 /* which key size? */
47 add x6, x23, #16 38 add x6, x4, #16
48 sub w7, w24, #2 /* modified # of rounds */ 39 sub w7, w5, #2 /* modified # of rounds */
49 bmi 2f 40 bmi 2f
50 bne 5f 41 bne 5f
51 mov v5.16b, v3.16b 42 mov v5.16b, v3.16b
@@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data)
64 ld1 {v5.4s}, [x6], #16 /* load next round key */ 55 ld1 {v5.4s}, [x6], #16 /* load next round key */
65 bpl 3b 56 bpl 3b
66 aese v0.16b, v4.16b 57 aese v0.16b, v4.16b
67 subs w21, w21, #16 /* last data? */ 58 subs w2, w2, #16 /* last data? */
68 eor v0.16b, v0.16b, v5.16b /* final round */ 59 eor v0.16b, v0.16b, v5.16b /* final round */
69 bmi 6f 60 bmi 6f
70 ld1 {v1.16b}, [x20], #16 /* load next input block */ 61 ld1 {v1.16b}, [x1], #16 /* load next input block */
71 eor v0.16b, v0.16b, v1.16b /* xor with mac */ 62 eor v0.16b, v0.16b, v1.16b /* xor with mac */
72 beq 6f 63 bne 1b
73 646: st1 {v0.16b}, [x0] /* store mac */
74 if_will_cond_yield_neon
75 st1 {v0.16b}, [x19] /* store mac */
76 do_cond_yield_neon
77 ld1 {v0.16b}, [x19] /* reload mac */
78 endif_yield_neon
79
80 b 1b
816: st1 {v0.16b}, [x19] /* store mac */
82 beq 10f 65 beq 10f
83 adds w21, w21, #16 66 adds w2, w2, #16
84 beq 10f 67 beq 10f
85 mov w25, w21 68 mov w8, w2
867: ldrb w7, [x20], #1 697: ldrb w7, [x1], #1
87 umov w6, v0.b[0] 70 umov w6, v0.b[0]
88 eor w6, w6, w7 71 eor w6, w6, w7
89 strb w6, [x19], #1 72 strb w6, [x0], #1
90 subs w21, w21, #1 73 subs w2, w2, #1
91 beq 10f 74 beq 10f
92 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 75 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
93 b 7b 76 b 7b
948: mov w7, w25 778: mov w7, w8
95 add w25, w25, #16 78 add w8, w8, #16
969: ext v1.16b, v1.16b, v1.16b, #1 799: ext v1.16b, v1.16b, v1.16b, #1
97 adds w7, w7, #1 80 adds w7, w7, #1
98 bne 9b 81 bne 9b
99 eor v0.16b, v0.16b, v1.16b 82 eor v0.16b, v0.16b, v1.16b
100 st1 {v0.16b}, [x19] 83 st1 {v0.16b}, [x0]
10110: str w25, [x22] 8410: str w8, [x3]
102
103 frame_pop
104 ret 85 ret
105ENDPROC(ce_aes_ccm_auth_data) 86ENDPROC(ce_aes_ccm_auth_data)
106 87
@@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final)
145ENDPROC(ce_aes_ccm_final) 126ENDPROC(ce_aes_ccm_final)
146 127
147 .macro aes_ccm_do_crypt,enc 128 .macro aes_ccm_do_crypt,enc
148 frame_push 8 129 ldr x8, [x6, #8] /* load lower ctr */
149 130 ld1 {v0.16b}, [x5] /* load mac */
150 mov x19, x0 131CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
151 mov x20, x1
152 mov x21, x2
153 mov x22, x3
154 mov x23, x4
155 mov x24, x5
156 mov x25, x6
157
158 ldr x26, [x25, #8] /* load lower ctr */
159 ld1 {v0.16b}, [x24] /* load mac */
160CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
1610: /* outer loop */ 1320: /* outer loop */
162 ld1 {v1.8b}, [x25] /* load upper ctr */ 133 ld1 {v1.8b}, [x6] /* load upper ctr */
163 prfm pldl1strm, [x20] 134 prfm pldl1strm, [x1]
164 add x26, x26, #1 135 add x8, x8, #1
165 rev x9, x26 136 rev x9, x8
166 cmp w23, #12 /* which key size? */ 137 cmp w4, #12 /* which key size? */
167 sub w7, w23, #2 /* get modified # of rounds */ 138 sub w7, w4, #2 /* get modified # of rounds */
168 ins v1.d[1], x9 /* no carry in lower ctr */ 139 ins v1.d[1], x9 /* no carry in lower ctr */
169 ld1 {v3.4s}, [x22] /* load first round key */ 140 ld1 {v3.4s}, [x3] /* load first round key */
170 add x10, x22, #16 141 add x10, x3, #16
171 bmi 1f 142 bmi 1f
172 bne 4f 143 bne 4f
173 mov v5.16b, v3.16b 144 mov v5.16b, v3.16b
@@ -194,9 +165,9 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
194 bpl 2b 165 bpl 2b
195 aese v0.16b, v4.16b 166 aese v0.16b, v4.16b
196 aese v1.16b, v4.16b 167 aese v1.16b, v4.16b
197 subs w21, w21, #16 168 subs w2, w2, #16
198 bmi 7f /* partial block? */ 169 bmi 6f /* partial block? */
199 ld1 {v2.16b}, [x20], #16 /* load next input block */ 170 ld1 {v2.16b}, [x1], #16 /* load next input block */
200 .if \enc == 1 171 .if \enc == 1
201 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 172 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
202 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ 173 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
@@ -205,29 +176,18 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
205 eor v1.16b, v2.16b, v5.16b /* final round enc */ 176 eor v1.16b, v2.16b, v5.16b /* final round enc */
206 .endif 177 .endif
207 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 178 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
208 st1 {v1.16b}, [x19], #16 /* write output block */ 179 st1 {v1.16b}, [x0], #16 /* write output block */
209 beq 5f 180 bne 0b
210 181CPU_LE( rev x8, x8 )
211 if_will_cond_yield_neon 182 st1 {v0.16b}, [x5] /* store mac */
212 st1 {v0.16b}, [x24] /* store mac */ 183 str x8, [x6, #8] /* store lsb end of ctr (BE) */
213 do_cond_yield_neon 1845: ret
214 ld1 {v0.16b}, [x24] /* reload mac */ 185
215 endif_yield_neon 1866: eor v0.16b, v0.16b, v5.16b /* final round mac */
216
217 b 0b
2185:
219CPU_LE( rev x26, x26 )
220 st1 {v0.16b}, [x24] /* store mac */
221 str x26, [x25, #8] /* store lsb end of ctr (BE) */
222
2236: frame_pop
224 ret
225
2267: eor v0.16b, v0.16b, v5.16b /* final round mac */
227 eor v1.16b, v1.16b, v5.16b /* final round enc */ 187 eor v1.16b, v1.16b, v5.16b /* final round enc */
228 st1 {v0.16b}, [x24] /* store mac */ 188 st1 {v0.16b}, [x5] /* store mac */
229 add w21, w21, #16 /* process partial tail block */ 189 add w2, w2, #16 /* process partial tail block */
2308: ldrb w9, [x20], #1 /* get 1 byte of input */ 1907: ldrb w9, [x1], #1 /* get 1 byte of input */
231 umov w6, v1.b[0] /* get top crypted ctr byte */ 191 umov w6, v1.b[0] /* get top crypted ctr byte */
232 umov w7, v0.b[0] /* get top mac byte */ 192 umov w7, v0.b[0] /* get top mac byte */
233 .if \enc == 1 193 .if \enc == 1
@@ -237,13 +197,13 @@ CPU_LE( rev x26, x26 )
237 eor w9, w9, w6 197 eor w9, w9, w6
238 eor w7, w7, w9 198 eor w7, w7, w9
239 .endif 199 .endif
240 strb w9, [x19], #1 /* store out byte */ 200 strb w9, [x0], #1 /* store out byte */
241 strb w7, [x24], #1 /* store mac byte */ 201 strb w7, [x5], #1 /* store mac byte */
242 subs w21, w21, #1 202 subs w2, w2, #1
243 beq 6b 203 beq 5b
244 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ 204 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
245 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ 205 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
246 b 8b 206 b 7b
247 .endm 207 .endm
248 208
249 /* 209 /*
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index dcffb9e77589..c723647b37db 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -322,55 +322,41 @@ ENDPROC(pmull_ghash_update_p8)
322 .endm 322 .endm
323 323
324 .macro pmull_gcm_do_crypt, enc 324 .macro pmull_gcm_do_crypt, enc
325 frame_push 10 325 ld1 {SHASH.2d}, [x4]
326 ld1 {XL.2d}, [x1]
327 ldr x8, [x5, #8] // load lower counter
326 328
327 mov x19, x0 329 load_round_keys w7, x6
328 mov x20, x1
329 mov x21, x2
330 mov x22, x3
331 mov x23, x4
332 mov x24, x5
333 mov x25, x6
334 mov x26, x7
335 .if \enc == 1
336 ldr x27, [sp, #96] // first stacked arg
337 .endif
338
339 ldr x28, [x24, #8] // load lower counter
340CPU_LE( rev x28, x28 )
341
3420: mov x0, x25
343 load_round_keys w26, x0
344 ld1 {SHASH.2d}, [x23]
345 ld1 {XL.2d}, [x20]
346 330
347 movi MASK.16b, #0xe1 331 movi MASK.16b, #0xe1
348 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 332 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
333CPU_LE( rev x8, x8 )
349 shl MASK.2d, MASK.2d, #57 334 shl MASK.2d, MASK.2d, #57
350 eor SHASH2.16b, SHASH2.16b, SHASH.16b 335 eor SHASH2.16b, SHASH2.16b, SHASH.16b
351 336
352 .if \enc == 1 337 .if \enc == 1
353 ld1 {KS.16b}, [x27] 338 ldr x10, [sp]
339 ld1 {KS.16b}, [x10]
354 .endif 340 .endif
355 341
3561: ld1 {CTR.8b}, [x24] // load upper counter 3420: ld1 {CTR.8b}, [x5] // load upper counter
357 ld1 {INP.16b}, [x22], #16 343 ld1 {INP.16b}, [x3], #16
358 rev x9, x28 344 rev x9, x8
359 add x28, x28, #1 345 add x8, x8, #1
360 sub w19, w19, #1 346 sub w0, w0, #1
361 ins CTR.d[1], x9 // set lower counter 347 ins CTR.d[1], x9 // set lower counter
362 348
363 .if \enc == 1 349 .if \enc == 1
364 eor INP.16b, INP.16b, KS.16b // encrypt input 350 eor INP.16b, INP.16b, KS.16b // encrypt input
365 st1 {INP.16b}, [x21], #16 351 st1 {INP.16b}, [x2], #16
366 .endif 352 .endif
367 353
368 rev64 T1.16b, INP.16b 354 rev64 T1.16b, INP.16b
369 355
370 cmp w26, #12 356 cmp w7, #12
371 b.ge 4f // AES-192/256? 357 b.ge 2f // AES-192/256?
372 358
3732: enc_round CTR, v21 3591: enc_round CTR, v21
374 360
375 ext T2.16b, XL.16b, XL.16b, #8 361 ext T2.16b, XL.16b, XL.16b, #8
376 ext IN1.16b, T1.16b, T1.16b, #8 362 ext IN1.16b, T1.16b, T1.16b, #8
@@ -425,39 +411,27 @@ CPU_LE( rev x28, x28 )
425 411
426 .if \enc == 0 412 .if \enc == 0
427 eor INP.16b, INP.16b, KS.16b 413 eor INP.16b, INP.16b, KS.16b
428 st1 {INP.16b}, [x21], #16 414 st1 {INP.16b}, [x2], #16
429 .endif 415 .endif
430 416
431 cbz w19, 3f 417 cbnz w0, 0b
432 418
433 if_will_cond_yield_neon 419CPU_LE( rev x8, x8 )
434 st1 {XL.2d}, [x20] 420 st1 {XL.2d}, [x1]
435 .if \enc == 1 421 str x8, [x5, #8] // store lower counter
436 st1 {KS.16b}, [x27]
437 .endif
438 do_cond_yield_neon
439 b 0b
440 endif_yield_neon
441 422
442 b 1b
443
4443: st1 {XL.2d}, [x20]
445 .if \enc == 1 423 .if \enc == 1
446 st1 {KS.16b}, [x27] 424 st1 {KS.16b}, [x10]
447 .endif 425 .endif
448 426
449CPU_LE( rev x28, x28 )
450 str x28, [x24, #8] // store lower counter
451
452 frame_pop
453 ret 427 ret
454 428
4554: b.eq 5f // AES-192? 4292: b.eq 3f // AES-192?
456 enc_round CTR, v17 430 enc_round CTR, v17
457 enc_round CTR, v18 431 enc_round CTR, v18
4585: enc_round CTR, v19 4323: enc_round CTR, v19
459 enc_round CTR, v20 433 enc_round CTR, v20
460 b 2b 434 b 1b
461 .endm 435 .endm
462 436
463 /* 437 /*
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 7cf0b1aa6ea8..8a10f1d7199a 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -488,9 +488,13 @@ static int gcm_decrypt(struct aead_request *req)
488 err = skcipher_walk_done(&walk, 488 err = skcipher_walk_done(&walk,
489 walk.nbytes % AES_BLOCK_SIZE); 489 walk.nbytes % AES_BLOCK_SIZE);
490 } 490 }
491 if (walk.nbytes) 491 if (walk.nbytes) {
492 pmull_gcm_encrypt_block(iv, iv, NULL, 492 kernel_neon_begin();
493 pmull_gcm_encrypt_block(iv, iv, ctx->aes_key.key_enc,
493 num_rounds(&ctx->aes_key)); 494 num_rounds(&ctx->aes_key));
495 kernel_neon_end();
496 }
497
494 } else { 498 } else {
495 __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, 499 __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
496 num_rounds(&ctx->aes_key)); 500 num_rounds(&ctx->aes_key));
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index c0235e0ff849..9bca54dda75c 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -40,17 +40,6 @@
40 40
41#include <asm/cmpxchg.h> 41#include <asm/cmpxchg.h>
42 42
43#define ___atomic_add_unless(v, a, u, sfx) \
44({ \
45 typeof((v)->counter) c, old; \
46 \
47 c = atomic##sfx##_read(v); \
48 while (c != (u) && \
49 (old = atomic##sfx##_cmpxchg((v), c, c + (a))) != c) \
50 c = old; \
51 c; \
52 })
53
54#define ATOMIC_INIT(i) { (i) } 43#define ATOMIC_INIT(i) { (i) }
55 44
56#define atomic_read(v) READ_ONCE((v)->counter) 45#define atomic_read(v) READ_ONCE((v)->counter)
@@ -61,21 +50,11 @@
61#define atomic_add_return_release atomic_add_return_release 50#define atomic_add_return_release atomic_add_return_release
62#define atomic_add_return atomic_add_return 51#define atomic_add_return atomic_add_return
63 52
64#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v))
65#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v))
66#define atomic_inc_return_release(v) atomic_add_return_release(1, (v))
67#define atomic_inc_return(v) atomic_add_return(1, (v))
68
69#define atomic_sub_return_relaxed atomic_sub_return_relaxed 53#define atomic_sub_return_relaxed atomic_sub_return_relaxed
70#define atomic_sub_return_acquire atomic_sub_return_acquire 54#define atomic_sub_return_acquire atomic_sub_return_acquire
71#define atomic_sub_return_release atomic_sub_return_release 55#define atomic_sub_return_release atomic_sub_return_release
72#define atomic_sub_return atomic_sub_return 56#define atomic_sub_return atomic_sub_return
73 57
74#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v))
75#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v))
76#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v))
77#define atomic_dec_return(v) atomic_sub_return(1, (v))
78
79#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed 58#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
80#define atomic_fetch_add_acquire atomic_fetch_add_acquire 59#define atomic_fetch_add_acquire atomic_fetch_add_acquire
81#define atomic_fetch_add_release atomic_fetch_add_release 60#define atomic_fetch_add_release atomic_fetch_add_release
@@ -119,13 +98,6 @@
119 cmpxchg_release(&((v)->counter), (old), (new)) 98 cmpxchg_release(&((v)->counter), (old), (new))
120#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new)) 99#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new))
121 100
122#define atomic_inc(v) atomic_add(1, (v))
123#define atomic_dec(v) atomic_sub(1, (v))
124#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
125#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
126#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
127#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0)
128#define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,)
129#define atomic_andnot atomic_andnot 101#define atomic_andnot atomic_andnot
130 102
131/* 103/*
@@ -140,21 +112,11 @@
140#define atomic64_add_return_release atomic64_add_return_release 112#define atomic64_add_return_release atomic64_add_return_release
141#define atomic64_add_return atomic64_add_return 113#define atomic64_add_return atomic64_add_return
142 114
143#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v))
144#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v))
145#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v))
146#define atomic64_inc_return(v) atomic64_add_return(1, (v))
147
148#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed 115#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
149#define atomic64_sub_return_acquire atomic64_sub_return_acquire 116#define atomic64_sub_return_acquire atomic64_sub_return_acquire
150#define atomic64_sub_return_release atomic64_sub_return_release 117#define atomic64_sub_return_release atomic64_sub_return_release
151#define atomic64_sub_return atomic64_sub_return 118#define atomic64_sub_return atomic64_sub_return
152 119
153#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v))
154#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v))
155#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
156#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
157
158#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed 120#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
159#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire 121#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
160#define atomic64_fetch_add_release atomic64_fetch_add_release 122#define atomic64_fetch_add_release atomic64_fetch_add_release
@@ -195,16 +157,9 @@
195#define atomic64_cmpxchg_release atomic_cmpxchg_release 157#define atomic64_cmpxchg_release atomic_cmpxchg_release
196#define atomic64_cmpxchg atomic_cmpxchg 158#define atomic64_cmpxchg atomic_cmpxchg
197 159
198#define atomic64_inc(v) atomic64_add(1, (v))
199#define atomic64_dec(v) atomic64_sub(1, (v))
200#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
201#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0)
202#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
203#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0)
204#define atomic64_add_unless(v, a, u) (___atomic_add_unless(v, a, u, 64) != u)
205#define atomic64_andnot atomic64_andnot 160#define atomic64_andnot atomic64_andnot
206 161
207#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) 162#define atomic64_dec_if_positive atomic64_dec_if_positive
208 163
209#endif 164#endif
210#endif 165#endif
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
index 9c19594ce7cb..10d536b1af74 100644
--- a/arch/arm64/include/asm/bitops.h
+++ b/arch/arm64/include/asm/bitops.h
@@ -17,22 +17,11 @@
17#define __ASM_BITOPS_H 17#define __ASM_BITOPS_H
18 18
19#include <linux/compiler.h> 19#include <linux/compiler.h>
20#include <asm/barrier.h>
21 20
22#ifndef _LINUX_BITOPS_H 21#ifndef _LINUX_BITOPS_H
23#error only <linux/bitops.h> can be included directly 22#error only <linux/bitops.h> can be included directly
24#endif 23#endif
25 24
26/*
27 * Little endian assembly atomic bitops.
28 */
29extern void set_bit(int nr, volatile unsigned long *p);
30extern void clear_bit(int nr, volatile unsigned long *p);
31extern void change_bit(int nr, volatile unsigned long *p);
32extern int test_and_set_bit(int nr, volatile unsigned long *p);
33extern int test_and_clear_bit(int nr, volatile unsigned long *p);
34extern int test_and_change_bit(int nr, volatile unsigned long *p);
35
36#include <asm-generic/bitops/builtin-__ffs.h> 25#include <asm-generic/bitops/builtin-__ffs.h>
37#include <asm-generic/bitops/builtin-ffs.h> 26#include <asm-generic/bitops/builtin-ffs.h>
38#include <asm-generic/bitops/builtin-__fls.h> 27#include <asm-generic/bitops/builtin-__fls.h>
@@ -44,15 +33,11 @@ extern int test_and_change_bit(int nr, volatile unsigned long *p);
44 33
45#include <asm-generic/bitops/sched.h> 34#include <asm-generic/bitops/sched.h>
46#include <asm-generic/bitops/hweight.h> 35#include <asm-generic/bitops/hweight.h>
47#include <asm-generic/bitops/lock.h>
48 36
37#include <asm-generic/bitops/atomic.h>
38#include <asm-generic/bitops/lock.h>
49#include <asm-generic/bitops/non-atomic.h> 39#include <asm-generic/bitops/non-atomic.h>
50#include <asm-generic/bitops/le.h> 40#include <asm-generic/bitops/le.h>
51 41#include <asm-generic/bitops/ext2-atomic-setbit.h>
52/*
53 * Ext2 is defined to use little-endian byte ordering.
54 */
55#define ext2_set_bit_atomic(lock, nr, p) test_and_set_bit_le(nr, p)
56#define ext2_clear_bit_atomic(lock, nr, p) test_and_clear_bit_le(nr, p)
57 42
58#endif /* __ASM_BITOPS_H */ 43#endif /* __ASM_BITOPS_H */
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 192d791f1103..7ed320895d1f 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -87,6 +87,9 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base,
87#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) 87#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__)
88#define efi_is_64bit() (true) 88#define efi_is_64bit() (true)
89 89
90#define efi_table_attr(table, attr, instance) \
91 ((table##_t *)instance)->attr
92
90#define efi_call_proto(protocol, f, instance, ...) \ 93#define efi_call_proto(protocol, f, instance, ...) \
91 ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) 94 ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
92 95
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index a0fee6985e6a..b2b0c6405eb0 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -8,8 +8,6 @@
8 8
9struct pt_regs; 9struct pt_regs;
10 10
11extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
12
13static inline int nr_legacy_irqs(void) 11static inline int nr_legacy_irqs(void)
14{ 12{
15 return 0; 13 return 0;
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index d87f2d646caa..0ad1cf233470 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -37,9 +37,7 @@ static inline void __tlb_remove_table(void *_table)
37 37
38static inline void tlb_flush(struct mmu_gather *tlb) 38static inline void tlb_flush(struct mmu_gather *tlb)
39{ 39{
40 struct vm_area_struct vma; 40 struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
41
42 vma_init(&vma, tlb->mm);
43 41
44 /* 42 /*
45 * The ASID allocator will either invalidate the ASID or mark 43 * The ASID allocator will either invalidate the ASID or mark
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 60e5fc661f74..780a12f59a8f 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -42,16 +42,6 @@ int arch_show_interrupts(struct seq_file *p, int prec)
42 return 0; 42 return 0;
43} 43}
44 44
45void (*handle_arch_irq)(struct pt_regs *) = NULL;
46
47void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
48{
49 if (handle_arch_irq)
50 return;
51
52 handle_arch_irq = handle_irq;
53}
54
55#ifdef CONFIG_VMAP_STACK 45#ifdef CONFIG_VMAP_STACK
56static void init_irq_stacks(void) 46static void init_irq_stacks(void)
57{ 47{
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 137710f4dac3..68755fd70dcf 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,5 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ 2lib-y := clear_user.o delay.o copy_from_user.o \
3 copy_to_user.o copy_in_user.o copy_page.o \ 3 copy_to_user.o copy_in_user.o copy_page.o \
4 clear_page.o memchr.o memcpy.o memmove.o memset.o \ 4 clear_page.o memchr.o memcpy.o memmove.o memset.o \
5 memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ 5 memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S
deleted file mode 100644
index 43ac736baa5b..000000000000
--- a/arch/arm64/lib/bitops.S
+++ /dev/null
@@ -1,76 +0,0 @@
1/*
2 * Based on arch/arm/lib/bitops.h
3 *
4 * Copyright (C) 2013 ARM Ltd.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/linkage.h>
20#include <asm/assembler.h>
21#include <asm/lse.h>
22
23/*
24 * x0: bits 5:0 bit offset
25 * bits 31:6 word offset
26 * x1: address
27 */
28 .macro bitop, name, llsc, lse
29ENTRY( \name )
30 and w3, w0, #63 // Get bit offset
31 eor w0, w0, w3 // Clear low bits
32 mov x2, #1
33 add x1, x1, x0, lsr #3 // Get word offset
34alt_lse " prfm pstl1strm, [x1]", "nop"
35 lsl x3, x2, x3 // Create mask
36
37alt_lse "1: ldxr x2, [x1]", "\lse x3, [x1]"
38alt_lse " \llsc x2, x2, x3", "nop"
39alt_lse " stxr w0, x2, [x1]", "nop"
40alt_lse " cbnz w0, 1b", "nop"
41
42 ret
43ENDPROC(\name )
44 .endm
45
46 .macro testop, name, llsc, lse
47ENTRY( \name )
48 and w3, w0, #63 // Get bit offset
49 eor w0, w0, w3 // Clear low bits
50 mov x2, #1
51 add x1, x1, x0, lsr #3 // Get word offset
52alt_lse " prfm pstl1strm, [x1]", "nop"
53 lsl x4, x2, x3 // Create mask
54
55alt_lse "1: ldxr x2, [x1]", "\lse x4, x2, [x1]"
56 lsr x0, x2, x3
57alt_lse " \llsc x2, x2, x4", "nop"
58alt_lse " stlxr w5, x2, [x1]", "nop"
59alt_lse " cbnz w5, 1b", "nop"
60alt_lse " dmb ish", "nop"
61
62 and x0, x0, #1
63 ret
64ENDPROC(\name )
65 .endm
66
67/*
68 * Atomic bit operations.
69 */
70 bitop change_bit, eor, steor
71 bitop clear_bit, bic, stclr
72 bitop set_bit, orr, stset
73
74 testop test_and_change_bit, eor, ldeoral
75 testop test_and_clear_bit, bic, ldclral
76 testop test_and_set_bit, orr, ldsetal
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 1854e49aa18a..192b3ba07075 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -108,13 +108,10 @@ static pte_t get_clear_flush(struct mm_struct *mm,
108 unsigned long pgsize, 108 unsigned long pgsize,
109 unsigned long ncontig) 109 unsigned long ncontig)
110{ 110{
111 struct vm_area_struct vma;
112 pte_t orig_pte = huge_ptep_get(ptep); 111 pte_t orig_pte = huge_ptep_get(ptep);
113 bool valid = pte_valid(orig_pte); 112 bool valid = pte_valid(orig_pte);
114 unsigned long i, saddr = addr; 113 unsigned long i, saddr = addr;
115 114
116 vma_init(&vma, mm);
117
118 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { 115 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
119 pte_t pte = ptep_get_and_clear(mm, addr, ptep); 116 pte_t pte = ptep_get_and_clear(mm, addr, ptep);
120 117
@@ -127,8 +124,10 @@ static pte_t get_clear_flush(struct mm_struct *mm,
127 orig_pte = pte_mkdirty(orig_pte); 124 orig_pte = pte_mkdirty(orig_pte);
128 } 125 }
129 126
130 if (valid) 127 if (valid) {
128 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
131 flush_tlb_range(&vma, saddr, addr); 129 flush_tlb_range(&vma, saddr, addr);
130 }
132 return orig_pte; 131 return orig_pte;
133} 132}
134 133
@@ -147,10 +146,9 @@ static void clear_flush(struct mm_struct *mm,
147 unsigned long pgsize, 146 unsigned long pgsize,
148 unsigned long ncontig) 147 unsigned long ncontig)
149{ 148{
150 struct vm_area_struct vma; 149 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
151 unsigned long i, saddr = addr; 150 unsigned long i, saddr = addr;
152 151
153 vma_init(&vma, mm);
154 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) 152 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
155 pte_clear(mm, addr, ptep); 153 pte_clear(mm, addr, ptep);
156 154
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 493ff75670ff..8ae5d7ae4af3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp)
977 return 1; 977 return 1;
978} 978}
979 979
980int pud_free_pmd_page(pud_t *pud) 980int pud_free_pmd_page(pud_t *pud, unsigned long addr)
981{ 981{
982 return pud_none(*pud); 982 return pud_none(*pud);
983} 983}
984 984
985int pmd_free_pte_page(pmd_t *pmd) 985int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
986{ 986{
987 return pmd_none(*pmd); 987 return pmd_none(*pmd);
988} 988}
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 941e7554e886..c6b6a06231b2 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -2,8 +2,10 @@
2#ifndef __ARCH_H8300_ATOMIC__ 2#ifndef __ARCH_H8300_ATOMIC__
3#define __ARCH_H8300_ATOMIC__ 3#define __ARCH_H8300_ATOMIC__
4 4
5#include <linux/compiler.h>
5#include <linux/types.h> 6#include <linux/types.h>
6#include <asm/cmpxchg.h> 7#include <asm/cmpxchg.h>
8#include <asm/irqflags.h>
7 9
8/* 10/*
9 * Atomic operations that C can't guarantee us. Useful for 11 * Atomic operations that C can't guarantee us. Useful for
@@ -15,8 +17,6 @@
15#define atomic_read(v) READ_ONCE((v)->counter) 17#define atomic_read(v) READ_ONCE((v)->counter)
16#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) 18#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
17 19
18#include <linux/kernel.h>
19
20#define ATOMIC_OP_RETURN(op, c_op) \ 20#define ATOMIC_OP_RETURN(op, c_op) \
21static inline int atomic_##op##_return(int i, atomic_t *v) \ 21static inline int atomic_##op##_return(int i, atomic_t *v) \
22{ \ 22{ \
@@ -69,18 +69,6 @@ ATOMIC_OPS(sub, -=)
69#undef ATOMIC_OP_RETURN 69#undef ATOMIC_OP_RETURN
70#undef ATOMIC_OP 70#undef ATOMIC_OP
71 71
72#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
73#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
74
75#define atomic_inc_return(v) atomic_add_return(1, v)
76#define atomic_dec_return(v) atomic_sub_return(1, v)
77
78#define atomic_inc(v) (void)atomic_inc_return(v)
79#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
80
81#define atomic_dec(v) (void)atomic_dec_return(v)
82#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
83
84static inline int atomic_cmpxchg(atomic_t *v, int old, int new) 72static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
85{ 73{
86 int ret; 74 int ret;
@@ -94,7 +82,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
94 return ret; 82 return ret;
95} 83}
96 84
97static inline int __atomic_add_unless(atomic_t *v, int a, int u) 85static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
98{ 86{
99 int ret; 87 int ret;
100 h8300flags flags; 88 h8300flags flags;
@@ -106,5 +94,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
106 arch_local_irq_restore(flags); 94 arch_local_irq_restore(flags);
107 return ret; 95 return ret;
108} 96}
97#define atomic_fetch_add_unless atomic_fetch_add_unless
109 98
110#endif /* __ARCH_H8300_ATOMIC __ */ 99#endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index fb3dfb2a667e..311b9894ccc8 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -164,7 +164,7 @@ ATOMIC_OPS(xor)
164#undef ATOMIC_OP 164#undef ATOMIC_OP
165 165
166/** 166/**
167 * __atomic_add_unless - add unless the number is a given value 167 * atomic_fetch_add_unless - add unless the number is a given value
168 * @v: pointer to value 168 * @v: pointer to value
169 * @a: amount to add 169 * @a: amount to add
170 * @u: unless value is equal to u 170 * @u: unless value is equal to u
@@ -173,7 +173,7 @@ ATOMIC_OPS(xor)
173 * 173 *
174 */ 174 */
175 175
176static inline int __atomic_add_unless(atomic_t *v, int a, int u) 176static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
177{ 177{
178 int __oldval; 178 int __oldval;
179 register int tmp; 179 register int tmp;
@@ -196,18 +196,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
196 ); 196 );
197 return __oldval; 197 return __oldval;
198} 198}
199 199#define atomic_fetch_add_unless atomic_fetch_add_unless
200#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
201
202#define atomic_inc(v) atomic_add(1, (v))
203#define atomic_dec(v) atomic_sub(1, (v))
204
205#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
206#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
207#define atomic_sub_and_test(i, v) (atomic_sub_return(i, (v)) == 0)
208#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
209
210#define atomic_inc_return(v) (atomic_add_return(1, v))
211#define atomic_dec_return(v) (atomic_sub_return(1, v))
212 200
213#endif 201#endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 2524fb60fbc2..206530d0751b 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,91 +215,10 @@ ATOMIC64_FETCH_OP(xor, ^)
215 (cmpxchg(&((v)->counter), old, new)) 215 (cmpxchg(&((v)->counter), old, new))
216#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) 216#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
217 217
218static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
219{
220 int c, old;
221 c = atomic_read(v);
222 for (;;) {
223 if (unlikely(c == (u)))
224 break;
225 old = atomic_cmpxchg((v), c, c + (a));
226 if (likely(old == c))
227 break;
228 c = old;
229 }
230 return c;
231}
232
233
234static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
235{
236 long c, old;
237 c = atomic64_read(v);
238 for (;;) {
239 if (unlikely(c == (u)))
240 break;
241 old = atomic64_cmpxchg((v), c, c + (a));
242 if (likely(old == c))
243 break;
244 c = old;
245 }
246 return c != (u);
247}
248
249#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
250
251static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
252{
253 long c, old, dec;
254 c = atomic64_read(v);
255 for (;;) {
256 dec = c - 1;
257 if (unlikely(dec < 0))
258 break;
259 old = atomic64_cmpxchg((v), c, dec);
260 if (likely(old == c))
261 break;
262 c = old;
263 }
264 return dec;
265}
266
267/*
268 * Atomically add I to V and return TRUE if the resulting value is
269 * negative.
270 */
271static __inline__ int
272atomic_add_negative (int i, atomic_t *v)
273{
274 return atomic_add_return(i, v) < 0;
275}
276
277static __inline__ long
278atomic64_add_negative (__s64 i, atomic64_t *v)
279{
280 return atomic64_add_return(i, v) < 0;
281}
282
283#define atomic_dec_return(v) atomic_sub_return(1, (v))
284#define atomic_inc_return(v) atomic_add_return(1, (v))
285#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
286#define atomic64_inc_return(v) atomic64_add_return(1, (v))
287
288#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
289#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
290#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
291#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
292#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0)
293#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) == 0)
294
295#define atomic_add(i,v) (void)atomic_add_return((i), (v)) 218#define atomic_add(i,v) (void)atomic_add_return((i), (v))
296#define atomic_sub(i,v) (void)atomic_sub_return((i), (v)) 219#define atomic_sub(i,v) (void)atomic_sub_return((i), (v))
297#define atomic_inc(v) atomic_add(1, (v))
298#define atomic_dec(v) atomic_sub(1, (v))
299 220
300#define atomic64_add(i,v) (void)atomic64_add_return((i), (v)) 221#define atomic64_add(i,v) (void)atomic64_add_return((i), (v))
301#define atomic64_sub(i,v) (void)atomic64_sub_return((i), (v)) 222#define atomic64_sub(i,v) (void)atomic64_sub_return((i), (v))
302#define atomic64_inc(v) atomic64_add(1, (v))
303#define atomic64_dec(v) atomic64_sub(1, (v))
304 223
305#endif /* _ASM_IA64_ATOMIC_H */ 224#endif /* _ASM_IA64_ATOMIC_H */
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index db89e7306081..516355a774bf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -115,12 +115,11 @@ ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned
115 flush_tlb_all(); 115 flush_tlb_all();
116 } else { 116 } else {
117 /* 117 /*
118 * XXX fix me: flush_tlb_range() should take an mm pointer instead of a 118 * flush_tlb_range() takes a vma instead of a mm pointer because
119 * vma pointer. 119 * some architectures want the vm_flags for ITLB/DTLB flush.
120 */ 120 */
121 struct vm_area_struct vma; 121 struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
122 122
123 vma_init(&vma, tlb->mm);
124 /* flush the address range from the tlb: */ 123 /* flush the address range from the tlb: */
125 flush_tlb_range(&vma, start, end); 124 flush_tlb_range(&vma, start, end);
126 /* now flush the virt. page-table area mapping the address range: */ 125 /* now flush the virt. page-table area mapping the address range: */
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index e6c6dfd98de2..3b85c3ecac38 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -116,6 +116,7 @@ ia64_init_addr_space (void)
116 */ 116 */
117 vma = vm_area_alloc(current->mm); 117 vma = vm_area_alloc(current->mm);
118 if (vma) { 118 if (vma) {
119 vma_set_anonymous(vma);
119 vma->vm_start = current->thread.rbs_bot & PAGE_MASK; 120 vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
120 vma->vm_end = vma->vm_start + PAGE_SIZE; 121 vma->vm_end = vma->vm_start + PAGE_SIZE;
121 vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; 122 vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
@@ -133,6 +134,7 @@ ia64_init_addr_space (void)
133 if (!(current->personality & MMAP_PAGE_ZERO)) { 134 if (!(current->personality & MMAP_PAGE_ZERO)) {
134 vma = vm_area_alloc(current->mm); 135 vma = vm_area_alloc(current->mm);
135 if (vma) { 136 if (vma) {
137 vma_set_anonymous(vma);
136 vma->vm_end = PAGE_SIZE; 138 vma->vm_end = PAGE_SIZE;
137 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); 139 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
138 vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | 140 vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 785612b576f7..b29f93774d95 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -2,6 +2,7 @@
2config M68K 2config M68K
3 bool 3 bool
4 default y 4 default y
5 select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
5 select ARCH_MIGHT_HAVE_PC_PARPORT if ISA 6 select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
6 select ARCH_NO_COHERENT_DMA_MMAP if !MMU 7 select ARCH_NO_COHERENT_DMA_MMAP if !MMU
7 select HAVE_IDE 8 select HAVE_IDE
@@ -24,6 +25,10 @@ config M68K
24 select MODULES_USE_ELF_RELA 25 select MODULES_USE_ELF_RELA
25 select OLD_SIGSUSPEND3 26 select OLD_SIGSUSPEND3
26 select OLD_SIGACTION 27 select OLD_SIGACTION
28 select DMA_NONCOHERENT_OPS if HAS_DMA
29 select HAVE_MEMBLOCK
30 select ARCH_DISCARD_MEMBLOCK
31 select NO_BOOTMEM
27 32
28config CPU_BIG_ENDIAN 33config CPU_BIG_ENDIAN
29 def_bool y 34 def_bool y
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index b2a6bc63f8cd..aef8d42e078d 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -31,7 +31,6 @@ extern void dn_sched_init(irq_handler_t handler);
31extern void dn_init_IRQ(void); 31extern void dn_init_IRQ(void);
32extern u32 dn_gettimeoffset(void); 32extern u32 dn_gettimeoffset(void);
33extern int dn_dummy_hwclk(int, struct rtc_time *); 33extern int dn_dummy_hwclk(int, struct rtc_time *);
34extern int dn_dummy_set_clock_mmss(unsigned long);
35extern void dn_dummy_reset(void); 34extern void dn_dummy_reset(void);
36#ifdef CONFIG_HEARTBEAT 35#ifdef CONFIG_HEARTBEAT
37static void dn_heartbeat(int on); 36static void dn_heartbeat(int on);
@@ -156,7 +155,6 @@ void __init config_apollo(void)
156 arch_gettimeoffset = dn_gettimeoffset; 155 arch_gettimeoffset = dn_gettimeoffset;
157 mach_max_dma_address = 0xffffffff; 156 mach_max_dma_address = 0xffffffff;
158 mach_hwclk = dn_dummy_hwclk; /* */ 157 mach_hwclk = dn_dummy_hwclk; /* */
159 mach_set_clock_mmss = dn_dummy_set_clock_mmss; /* */
160 mach_reset = dn_dummy_reset; /* */ 158 mach_reset = dn_dummy_reset; /* */
161#ifdef CONFIG_HEARTBEAT 159#ifdef CONFIG_HEARTBEAT
162 mach_heartbeat = dn_heartbeat; 160 mach_heartbeat = dn_heartbeat;
@@ -240,12 +238,6 @@ int dn_dummy_hwclk(int op, struct rtc_time *t) {
240 238
241} 239}
242 240
243int dn_dummy_set_clock_mmss(unsigned long nowtime)
244{
245 pr_info("set_clock_mmss\n");
246 return 0;
247}
248
249void dn_dummy_reset(void) { 241void dn_dummy_reset(void) {
250 242
251 dn_serial_print("The end !\n"); 243 dn_serial_print("The end !\n");
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 565c6f06ab0b..bd96702a1ad0 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -81,9 +81,6 @@ extern void atari_sched_init(irq_handler_t);
81extern u32 atari_gettimeoffset(void); 81extern u32 atari_gettimeoffset(void);
82extern int atari_mste_hwclk (int, struct rtc_time *); 82extern int atari_mste_hwclk (int, struct rtc_time *);
83extern int atari_tt_hwclk (int, struct rtc_time *); 83extern int atari_tt_hwclk (int, struct rtc_time *);
84extern int atari_mste_set_clock_mmss (unsigned long);
85extern int atari_tt_set_clock_mmss (unsigned long);
86
87 84
88/* ++roman: This is a more elaborate test for an SCC chip, since the plain 85/* ++roman: This is a more elaborate test for an SCC chip, since the plain
89 * Medusa board generates DTACK at the SCC's standard addresses, but a SCC 86 * Medusa board generates DTACK at the SCC's standard addresses, but a SCC
@@ -362,13 +359,11 @@ void __init config_atari(void)
362 ATARIHW_SET(TT_CLK); 359 ATARIHW_SET(TT_CLK);
363 pr_cont(" TT_CLK"); 360 pr_cont(" TT_CLK");
364 mach_hwclk = atari_tt_hwclk; 361 mach_hwclk = atari_tt_hwclk;
365 mach_set_clock_mmss = atari_tt_set_clock_mmss;
366 } 362 }
367 if (hwreg_present(&mste_rtc.sec_ones)) { 363 if (hwreg_present(&mste_rtc.sec_ones)) {
368 ATARIHW_SET(MSTE_CLK); 364 ATARIHW_SET(MSTE_CLK);
369 pr_cont(" MSTE_CLK"); 365 pr_cont(" MSTE_CLK");
370 mach_hwclk = atari_mste_hwclk; 366 mach_hwclk = atari_mste_hwclk;
371 mach_set_clock_mmss = atari_mste_set_clock_mmss;
372 } 367 }
373 if (!MACH_IS_MEDUSA && hwreg_present(&dma_wd.fdc_speed) && 368 if (!MACH_IS_MEDUSA && hwreg_present(&dma_wd.fdc_speed) &&
374 hwreg_write(&dma_wd.fdc_speed, 0)) { 369 hwreg_write(&dma_wd.fdc_speed, 0)) {
diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c
index c549b48174ec..9cca64286464 100644
--- a/arch/m68k/atari/time.c
+++ b/arch/m68k/atari/time.c
@@ -285,69 +285,6 @@ int atari_tt_hwclk( int op, struct rtc_time *t )
285 return( 0 ); 285 return( 0 );
286} 286}
287 287
288
289int atari_mste_set_clock_mmss (unsigned long nowtime)
290{
291 short real_seconds = nowtime % 60, real_minutes = (nowtime / 60) % 60;
292 struct MSTE_RTC val;
293 unsigned char rtc_minutes;
294
295 mste_read(&val);
296 rtc_minutes= val.min_ones + val.min_tens * 10;
297 if ((rtc_minutes < real_minutes
298 ? real_minutes - rtc_minutes
299 : rtc_minutes - real_minutes) < 30)
300 {
301 val.sec_ones = real_seconds % 10;
302 val.sec_tens = real_seconds / 10;
303 val.min_ones = real_minutes % 10;
304 val.min_tens = real_minutes / 10;
305 mste_write(&val);
306 }
307 else
308 return -1;
309 return 0;
310}
311
312int atari_tt_set_clock_mmss (unsigned long nowtime)
313{
314 int retval = 0;
315 short real_seconds = nowtime % 60, real_minutes = (nowtime / 60) % 60;
316 unsigned char save_control, save_freq_select, rtc_minutes;
317
318 save_control = RTC_READ (RTC_CONTROL); /* tell the clock it's being set */
319 RTC_WRITE (RTC_CONTROL, save_control | RTC_SET);
320
321 save_freq_select = RTC_READ (RTC_FREQ_SELECT); /* stop and reset prescaler */
322 RTC_WRITE (RTC_FREQ_SELECT, save_freq_select | RTC_DIV_RESET2);
323
324 rtc_minutes = RTC_READ (RTC_MINUTES);
325 if (!(save_control & RTC_DM_BINARY))
326 rtc_minutes = bcd2bin(rtc_minutes);
327
328 /* Since we're only adjusting minutes and seconds, don't interfere
329 with hour overflow. This avoids messing with unknown time zones
330 but requires your RTC not to be off by more than 30 minutes. */
331 if ((rtc_minutes < real_minutes
332 ? real_minutes - rtc_minutes
333 : rtc_minutes - real_minutes) < 30)
334 {
335 if (!(save_control & RTC_DM_BINARY))
336 {
337 real_seconds = bin2bcd(real_seconds);
338 real_minutes = bin2bcd(real_minutes);
339 }
340 RTC_WRITE (RTC_SECONDS, real_seconds);
341 RTC_WRITE (RTC_MINUTES, real_minutes);
342 }
343 else
344 retval = -1;
345
346 RTC_WRITE (RTC_FREQ_SELECT, save_freq_select);
347 RTC_WRITE (RTC_CONTROL, save_control);
348 return retval;
349}
350
351/* 288/*
352 * Local variables: 289 * Local variables:
353 * c-indent-level: 4 290 * c-indent-level: 4
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 2cfff4765040..143ee9fa3893 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -41,7 +41,6 @@ static void bvme6000_get_model(char *model);
41extern void bvme6000_sched_init(irq_handler_t handler); 41extern void bvme6000_sched_init(irq_handler_t handler);
42extern u32 bvme6000_gettimeoffset(void); 42extern u32 bvme6000_gettimeoffset(void);
43extern int bvme6000_hwclk (int, struct rtc_time *); 43extern int bvme6000_hwclk (int, struct rtc_time *);
44extern int bvme6000_set_clock_mmss (unsigned long);
45extern void bvme6000_reset (void); 44extern void bvme6000_reset (void);
46void bvme6000_set_vectors (void); 45void bvme6000_set_vectors (void);
47 46
@@ -113,7 +112,6 @@ void __init config_bvme6000(void)
113 mach_init_IRQ = bvme6000_init_IRQ; 112 mach_init_IRQ = bvme6000_init_IRQ;
114 arch_gettimeoffset = bvme6000_gettimeoffset; 113 arch_gettimeoffset = bvme6000_gettimeoffset;
115 mach_hwclk = bvme6000_hwclk; 114 mach_hwclk = bvme6000_hwclk;
116 mach_set_clock_mmss = bvme6000_set_clock_mmss;
117 mach_reset = bvme6000_reset; 115 mach_reset = bvme6000_reset;
118 mach_get_model = bvme6000_get_model; 116 mach_get_model = bvme6000_get_model;
119 117
@@ -305,46 +303,3 @@ int bvme6000_hwclk(int op, struct rtc_time *t)
305 303
306 return 0; 304 return 0;
307} 305}
308
309/*
310 * Set the minutes and seconds from seconds value 'nowtime'. Fail if
311 * clock is out by > 30 minutes. Logic lifted from atari code.
312 * Algorithm is to wait for the 10ms register to change, and then to
313 * wait a short while, and then set it.
314 */
315
316int bvme6000_set_clock_mmss (unsigned long nowtime)
317{
318 int retval = 0;
319 short real_seconds = nowtime % 60, real_minutes = (nowtime / 60) % 60;
320 unsigned char rtc_minutes, rtc_tenms;
321 volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE;
322 unsigned char msr = rtc->msr & 0xc0;
323 unsigned long flags;
324 volatile int i;
325
326 rtc->msr = 0; /* Ensure clock accessible */
327 rtc_minutes = bcd2bin (rtc->bcd_min);
328
329 if ((rtc_minutes < real_minutes
330 ? real_minutes - rtc_minutes
331 : rtc_minutes - real_minutes) < 30)
332 {
333 local_irq_save(flags);
334 rtc_tenms = rtc->bcd_tenms;
335 while (rtc_tenms == rtc->bcd_tenms)
336 ;
337 for (i = 0; i < 1000; i++)
338 ;
339 rtc->bcd_min = bin2bcd(real_minutes);
340 rtc->bcd_sec = bin2bcd(real_seconds);
341 local_irq_restore(flags);
342 }
343 else
344 retval = -1;
345
346 rtc->msr = msr;
347
348 return retval;
349}
350
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index a874e54404d1..1d5483f6e457 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -52,6 +52,7 @@ CONFIG_UNIX_DIAG=m
52CONFIG_TLS=m 52CONFIG_TLS=m
53CONFIG_XFRM_MIGRATE=y 53CONFIG_XFRM_MIGRATE=y
54CONFIG_NET_KEY=y 54CONFIG_NET_KEY=y
55CONFIG_XDP_SOCKETS=y
55CONFIG_INET=y 56CONFIG_INET=y
56CONFIG_IP_PNP=y 57CONFIG_IP_PNP=y
57CONFIG_IP_PNP_DHCP=y 58CONFIG_IP_PNP_DHCP=y
@@ -98,18 +99,14 @@ CONFIG_NF_CONNTRACK_SANE=m
98CONFIG_NF_CONNTRACK_SIP=m 99CONFIG_NF_CONNTRACK_SIP=m
99CONFIG_NF_CONNTRACK_TFTP=m 100CONFIG_NF_CONNTRACK_TFTP=m
100CONFIG_NF_TABLES=m 101CONFIG_NF_TABLES=m
102CONFIG_NF_TABLES_SET=m
101CONFIG_NF_TABLES_INET=y 103CONFIG_NF_TABLES_INET=y
102CONFIG_NF_TABLES_NETDEV=y 104CONFIG_NF_TABLES_NETDEV=y
103CONFIG_NFT_EXTHDR=m
104CONFIG_NFT_META=m
105CONFIG_NFT_RT=m
106CONFIG_NFT_NUMGEN=m 105CONFIG_NFT_NUMGEN=m
107CONFIG_NFT_CT=m 106CONFIG_NFT_CT=m
108CONFIG_NFT_FLOW_OFFLOAD=m 107CONFIG_NFT_FLOW_OFFLOAD=m
109CONFIG_NFT_SET_RBTREE=m
110CONFIG_NFT_SET_HASH=m
111CONFIG_NFT_SET_BITMAP=m
112CONFIG_NFT_COUNTER=m 108CONFIG_NFT_COUNTER=m
109CONFIG_NFT_CONNLIMIT=m
113CONFIG_NFT_LOG=m 110CONFIG_NFT_LOG=m
114CONFIG_NFT_LIMIT=m 111CONFIG_NFT_LIMIT=m
115CONFIG_NFT_MASQ=m 112CONFIG_NFT_MASQ=m
@@ -122,6 +119,7 @@ CONFIG_NFT_REJECT=m
122CONFIG_NFT_COMPAT=m 119CONFIG_NFT_COMPAT=m
123CONFIG_NFT_HASH=m 120CONFIG_NFT_HASH=m
124CONFIG_NFT_FIB_INET=m 121CONFIG_NFT_FIB_INET=m
122CONFIG_NFT_SOCKET=m
125CONFIG_NFT_DUP_NETDEV=m 123CONFIG_NFT_DUP_NETDEV=m
126CONFIG_NFT_FWD_NETDEV=m 124CONFIG_NFT_FWD_NETDEV=m
127CONFIG_NFT_FIB_NETDEV=m 125CONFIG_NFT_FIB_NETDEV=m
@@ -200,7 +198,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
200CONFIG_IP_SET_HASH_NETIFACE=m 198CONFIG_IP_SET_HASH_NETIFACE=m
201CONFIG_IP_SET_LIST_SET=m 199CONFIG_IP_SET_LIST_SET=m
202CONFIG_NF_CONNTRACK_IPV4=m 200CONFIG_NF_CONNTRACK_IPV4=m
203CONFIG_NF_SOCKET_IPV4=m
204CONFIG_NFT_CHAIN_ROUTE_IPV4=m 201CONFIG_NFT_CHAIN_ROUTE_IPV4=m
205CONFIG_NFT_DUP_IPV4=m 202CONFIG_NFT_DUP_IPV4=m
206CONFIG_NFT_FIB_IPV4=m 203CONFIG_NFT_FIB_IPV4=m
@@ -231,7 +228,6 @@ CONFIG_IP_NF_ARPTABLES=m
231CONFIG_IP_NF_ARPFILTER=m 228CONFIG_IP_NF_ARPFILTER=m
232CONFIG_IP_NF_ARP_MANGLE=m 229CONFIG_IP_NF_ARP_MANGLE=m
233CONFIG_NF_CONNTRACK_IPV6=m 230CONFIG_NF_CONNTRACK_IPV6=m
234CONFIG_NF_SOCKET_IPV6=m
235CONFIG_NFT_CHAIN_ROUTE_IPV6=m 231CONFIG_NFT_CHAIN_ROUTE_IPV6=m
236CONFIG_NFT_CHAIN_NAT_IPV6=m 232CONFIG_NFT_CHAIN_NAT_IPV6=m
237CONFIG_NFT_MASQ_IPV6=m 233CONFIG_NFT_MASQ_IPV6=m
@@ -260,7 +256,6 @@ CONFIG_IP6_NF_NAT=m
260CONFIG_IP6_NF_TARGET_MASQUERADE=m 256CONFIG_IP6_NF_TARGET_MASQUERADE=m
261CONFIG_IP6_NF_TARGET_NPT=m 257CONFIG_IP6_NF_TARGET_NPT=m
262CONFIG_NF_TABLES_BRIDGE=y 258CONFIG_NF_TABLES_BRIDGE=y
263CONFIG_NFT_BRIDGE_META=m
264CONFIG_NFT_BRIDGE_REJECT=m 259CONFIG_NFT_BRIDGE_REJECT=m
265CONFIG_NF_LOG_BRIDGE=m 260CONFIG_NF_LOG_BRIDGE=m
266CONFIG_BRIDGE_NF_EBTABLES=m 261CONFIG_BRIDGE_NF_EBTABLES=m
@@ -301,6 +296,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
301CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 296CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
302CONFIG_DNS_RESOLVER=y 297CONFIG_DNS_RESOLVER=y
303CONFIG_BATMAN_ADV=m 298CONFIG_BATMAN_ADV=m
299# CONFIG_BATMAN_ADV_BATMAN_V is not set
304CONFIG_BATMAN_ADV_DAT=y 300CONFIG_BATMAN_ADV_DAT=y
305CONFIG_BATMAN_ADV_NC=y 301CONFIG_BATMAN_ADV_NC=y
306CONFIG_BATMAN_ADV_MCAST=y 302CONFIG_BATMAN_ADV_MCAST=y
@@ -356,6 +352,7 @@ CONFIG_A2091_SCSI=y
356CONFIG_GVP11_SCSI=y 352CONFIG_GVP11_SCSI=y
357CONFIG_SCSI_A4000T=y 353CONFIG_SCSI_A4000T=y
358CONFIG_SCSI_ZORRO7XX=y 354CONFIG_SCSI_ZORRO7XX=y
355CONFIG_SCSI_ZORRO_ESP=y
359CONFIG_MD=y 356CONFIG_MD=y
360CONFIG_MD_LINEAR=m 357CONFIG_MD_LINEAR=m
361CONFIG_BLK_DEV_DM=m 358CONFIG_BLK_DEV_DM=m
@@ -363,6 +360,7 @@ CONFIG_DM_UNSTRIPED=m
363CONFIG_DM_CRYPT=m 360CONFIG_DM_CRYPT=m
364CONFIG_DM_SNAPSHOT=m 361CONFIG_DM_SNAPSHOT=m
365CONFIG_DM_THIN_PROVISIONING=m 362CONFIG_DM_THIN_PROVISIONING=m
363CONFIG_DM_WRITECACHE=m
366CONFIG_DM_ERA=m 364CONFIG_DM_ERA=m
367CONFIG_DM_MIRROR=m 365CONFIG_DM_MIRROR=m
368CONFIG_DM_RAID=m 366CONFIG_DM_RAID=m
@@ -402,8 +400,8 @@ CONFIG_A2065=y
402CONFIG_ARIADNE=y 400CONFIG_ARIADNE=y
403# CONFIG_NET_VENDOR_AQUANTIA is not set 401# CONFIG_NET_VENDOR_AQUANTIA is not set
404# CONFIG_NET_VENDOR_ARC is not set 402# CONFIG_NET_VENDOR_ARC is not set
405# CONFIG_NET_CADENCE is not set
406# CONFIG_NET_VENDOR_BROADCOM is not set 403# CONFIG_NET_VENDOR_BROADCOM is not set
404# CONFIG_NET_CADENCE is not set
407# CONFIG_NET_VENDOR_CIRRUS is not set 405# CONFIG_NET_VENDOR_CIRRUS is not set
408# CONFIG_NET_VENDOR_CORTINA is not set 406# CONFIG_NET_VENDOR_CORTINA is not set
409# CONFIG_NET_VENDOR_EZCHIP is not set 407# CONFIG_NET_VENDOR_EZCHIP is not set
@@ -412,8 +410,10 @@ CONFIG_ARIADNE=y
412# CONFIG_NET_VENDOR_INTEL is not set 410# CONFIG_NET_VENDOR_INTEL is not set
413# CONFIG_NET_VENDOR_MARVELL is not set 411# CONFIG_NET_VENDOR_MARVELL is not set
414# CONFIG_NET_VENDOR_MICREL is not set 412# CONFIG_NET_VENDOR_MICREL is not set
413# CONFIG_NET_VENDOR_MICROSEMI is not set
415# CONFIG_NET_VENDOR_NETRONOME is not set 414# CONFIG_NET_VENDOR_NETRONOME is not set
416# CONFIG_NET_VENDOR_NI is not set 415# CONFIG_NET_VENDOR_NI is not set
416CONFIG_XSURF100=y
417CONFIG_HYDRA=y 417CONFIG_HYDRA=y
418CONFIG_APNE=y 418CONFIG_APNE=y
419CONFIG_ZORRO8390=y 419CONFIG_ZORRO8390=y
@@ -426,9 +426,9 @@ CONFIG_ZORRO8390=y
426# CONFIG_NET_VENDOR_SMSC is not set 426# CONFIG_NET_VENDOR_SMSC is not set
427# CONFIG_NET_VENDOR_SOCIONEXT is not set 427# CONFIG_NET_VENDOR_SOCIONEXT is not set
428# CONFIG_NET_VENDOR_STMICRO is not set 428# CONFIG_NET_VENDOR_STMICRO is not set
429# CONFIG_NET_VENDOR_SYNOPSYS is not set
429# CONFIG_NET_VENDOR_VIA is not set 430# CONFIG_NET_VENDOR_VIA is not set
430# CONFIG_NET_VENDOR_WIZNET is not set 431# CONFIG_NET_VENDOR_WIZNET is not set
431# CONFIG_NET_VENDOR_SYNOPSYS is not set
432CONFIG_PPP=m 432CONFIG_PPP=m
433CONFIG_PPP_BSDCOMP=m 433CONFIG_PPP_BSDCOMP=m
434CONFIG_PPP_DEFLATE=m 434CONFIG_PPP_DEFLATE=m
@@ -478,6 +478,7 @@ CONFIG_HIDRAW=y
478CONFIG_UHID=m 478CONFIG_UHID=m
479# CONFIG_HID_GENERIC is not set 479# CONFIG_HID_GENERIC is not set
480# CONFIG_HID_ITE is not set 480# CONFIG_HID_ITE is not set
481# CONFIG_HID_REDRAGON is not set
481# CONFIG_USB_SUPPORT is not set 482# CONFIG_USB_SUPPORT is not set
482CONFIG_RTC_CLASS=y 483CONFIG_RTC_CLASS=y
483# CONFIG_RTC_NVMEM is not set 484# CONFIG_RTC_NVMEM is not set
@@ -499,7 +500,7 @@ CONFIG_FS_ENCRYPTION=m
499CONFIG_FANOTIFY=y 500CONFIG_FANOTIFY=y
500CONFIG_QUOTA_NETLINK_INTERFACE=y 501CONFIG_QUOTA_NETLINK_INTERFACE=y
501# CONFIG_PRINT_QUOTA_WARNING is not set 502# CONFIG_PRINT_QUOTA_WARNING is not set
502CONFIG_AUTOFS4_FS=m 503CONFIG_AUTOFS_FS=m
503CONFIG_FUSE_FS=m 504CONFIG_FUSE_FS=m
504CONFIG_CUSE=m 505CONFIG_CUSE=m
505CONFIG_OVERLAY_FS=m 506CONFIG_OVERLAY_FS=m
@@ -600,6 +601,7 @@ CONFIG_TEST_KSTRTOX=m
600CONFIG_TEST_PRINTF=m 601CONFIG_TEST_PRINTF=m
601CONFIG_TEST_BITMAP=m 602CONFIG_TEST_BITMAP=m
602CONFIG_TEST_UUID=m 603CONFIG_TEST_UUID=m
604CONFIG_TEST_OVERFLOW=m
603CONFIG_TEST_RHASHTABLE=m 605CONFIG_TEST_RHASHTABLE=m
604CONFIG_TEST_HASH=m 606CONFIG_TEST_HASH=m
605CONFIG_TEST_USER_COPY=m 607CONFIG_TEST_USER_COPY=m
@@ -622,6 +624,11 @@ CONFIG_CRYPTO_CRYPTD=m
622CONFIG_CRYPTO_MCRYPTD=m 624CONFIG_CRYPTO_MCRYPTD=m
623CONFIG_CRYPTO_TEST=m 625CONFIG_CRYPTO_TEST=m
624CONFIG_CRYPTO_CHACHA20POLY1305=m 626CONFIG_CRYPTO_CHACHA20POLY1305=m
627CONFIG_CRYPTO_AEGIS128=m
628CONFIG_CRYPTO_AEGIS128L=m
629CONFIG_CRYPTO_AEGIS256=m
630CONFIG_CRYPTO_MORUS640=m
631CONFIG_CRYPTO_MORUS1280=m
625CONFIG_CRYPTO_CFB=m 632CONFIG_CRYPTO_CFB=m
626CONFIG_CRYPTO_LRW=m 633CONFIG_CRYPTO_LRW=m
627CONFIG_CRYPTO_PCBC=m 634CONFIG_CRYPTO_PCBC=m
@@ -657,6 +664,7 @@ CONFIG_CRYPTO_LZO=m
657CONFIG_CRYPTO_842=m 664CONFIG_CRYPTO_842=m
658CONFIG_CRYPTO_LZ4=m 665CONFIG_CRYPTO_LZ4=m
659CONFIG_CRYPTO_LZ4HC=m 666CONFIG_CRYPTO_LZ4HC=m
667CONFIG_CRYPTO_ZSTD=m
660CONFIG_CRYPTO_ANSI_CPRNG=m 668CONFIG_CRYPTO_ANSI_CPRNG=m
661CONFIG_CRYPTO_DRBG_HASH=y 669CONFIG_CRYPTO_DRBG_HASH=y
662CONFIG_CRYPTO_DRBG_CTR=y 670CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 8ce39e23aa42..52a0af127951 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -50,6 +50,7 @@ CONFIG_UNIX_DIAG=m
50CONFIG_TLS=m 50CONFIG_TLS=m
51CONFIG_XFRM_MIGRATE=y 51CONFIG_XFRM_MIGRATE=y
52CONFIG_NET_KEY=y 52CONFIG_NET_KEY=y
53CONFIG_XDP_SOCKETS=y
53CONFIG_INET=y 54CONFIG_INET=y
54CONFIG_IP_PNP=y 55CONFIG_IP_PNP=y
55CONFIG_IP_PNP_DHCP=y 56CONFIG_IP_PNP_DHCP=y
@@ -96,18 +97,14 @@ CONFIG_NF_CONNTRACK_SANE=m
96CONFIG_NF_CONNTRACK_SIP=m 97CONFIG_NF_CONNTRACK_SIP=m
97CONFIG_NF_CONNTRACK_TFTP=m 98CONFIG_NF_CONNTRACK_TFTP=m
98CONFIG_NF_TABLES=m 99CONFIG_NF_TABLES=m
100CONFIG_NF_TABLES_SET=m
99CONFIG_NF_TABLES_INET=y 101CONFIG_NF_TABLES_INET=y
100CONFIG_NF_TABLES_NETDEV=y 102CONFIG_NF_TABLES_NETDEV=y
101CONFIG_NFT_EXTHDR=m
102CONFIG_NFT_META=m
103CONFIG_NFT_RT=m
104CONFIG_NFT_NUMGEN=m 103CONFIG_NFT_NUMGEN=m
105CONFIG_NFT_CT=m 104CONFIG_NFT_CT=m
106CONFIG_NFT_FLOW_OFFLOAD=m 105CONFIG_NFT_FLOW_OFFLOAD=m
107CONFIG_NFT_SET_RBTREE=m
108CONFIG_NFT_SET_HASH=m
109CONFIG_NFT_SET_BITMAP=m
110CONFIG_NFT_COUNTER=m 106CONFIG_NFT_COUNTER=m
107CONFIG_NFT_CONNLIMIT=m
111CONFIG_NFT_LOG=m 108CONFIG_NFT_LOG=m
112CONFIG_NFT_LIMIT=m 109CONFIG_NFT_LIMIT=m
113CONFIG_NFT_MASQ=m 110CONFIG_NFT_MASQ=m
@@ -120,6 +117,7 @@ CONFIG_NFT_REJECT=m
120CONFIG_NFT_COMPAT=m 117CONFIG_NFT_COMPAT=m
121CONFIG_NFT_HASH=m 118CONFIG_NFT_HASH=m
122CONFIG_NFT_FIB_INET=m 119CONFIG_NFT_FIB_INET=m
120CONFIG_NFT_SOCKET=m
123CONFIG_NFT_DUP_NETDEV=m 121CONFIG_NFT_DUP_NETDEV=m
124CONFIG_NFT_FWD_NETDEV=m 122CONFIG_NFT_FWD_NETDEV=m
125CONFIG_NFT_FIB_NETDEV=m 123CONFIG_NFT_FIB_NETDEV=m
@@ -198,7 +196,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
198CONFIG_IP_SET_HASH_NETIFACE=m 196CONFIG_IP_SET_HASH_NETIFACE=m
199CONFIG_IP_SET_LIST_SET=m 197CONFIG_IP_SET_LIST_SET=m
200CONFIG_NF_CONNTRACK_IPV4=m 198CONFIG_NF_CONNTRACK_IPV4=m
201CONFIG_NF_SOCKET_IPV4=m
202CONFIG_NFT_CHAIN_ROUTE_IPV4=m 199CONFIG_NFT_CHAIN_ROUTE_IPV4=m
203CONFIG_NFT_DUP_IPV4=m 200CONFIG_NFT_DUP_IPV4=m
204CONFIG_NFT_FIB_IPV4=m 201CONFIG_NFT_FIB_IPV4=m
@@ -229,7 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
229CONFIG_IP_NF_ARPFILTER=m 226CONFIG_IP_NF_ARPFILTER=m
230CONFIG_IP_NF_ARP_MANGLE=m 227CONFIG_IP_NF_ARP_MANGLE=m
231CONFIG_NF_CONNTRACK_IPV6=m 228CONFIG_NF_CONNTRACK_IPV6=m
232CONFIG_NF_SOCKET_IPV6=m
233CONFIG_NFT_CHAIN_ROUTE_IPV6=m 229CONFIG_NFT_CHAIN_ROUTE_IPV6=m
234CONFIG_NFT_CHAIN_NAT_IPV6=m 230CONFIG_NFT_CHAIN_NAT_IPV6=m
235CONFIG_NFT_MASQ_IPV6=m 231CONFIG_NFT_MASQ_IPV6=m
@@ -258,7 +254,6 @@ CONFIG_IP6_NF_NAT=m
258CONFIG_IP6_NF_TARGET_MASQUERADE=m 254CONFIG_IP6_NF_TARGET_MASQUERADE=m
259CONFIG_IP6_NF_TARGET_NPT=m 255CONFIG_IP6_NF_TARGET_NPT=m
260CONFIG_NF_TABLES_BRIDGE=y 256CONFIG_NF_TABLES_BRIDGE=y
261CONFIG_NFT_BRIDGE_META=m
262CONFIG_NFT_BRIDGE_REJECT=m 257CONFIG_NFT_BRIDGE_REJECT=m
263CONFIG_NF_LOG_BRIDGE=m 258CONFIG_NF_LOG_BRIDGE=m
264CONFIG_BRIDGE_NF_EBTABLES=m 259CONFIG_BRIDGE_NF_EBTABLES=m
@@ -299,6 +294,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
299CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 294CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
300CONFIG_DNS_RESOLVER=y 295CONFIG_DNS_RESOLVER=y
301CONFIG_BATMAN_ADV=m 296CONFIG_BATMAN_ADV=m
297# CONFIG_BATMAN_ADV_BATMAN_V is not set
302CONFIG_BATMAN_ADV_DAT=y 298CONFIG_BATMAN_ADV_DAT=y
303CONFIG_BATMAN_ADV_NC=y 299CONFIG_BATMAN_ADV_NC=y
304CONFIG_BATMAN_ADV_MCAST=y 300CONFIG_BATMAN_ADV_MCAST=y
@@ -345,6 +341,7 @@ CONFIG_DM_UNSTRIPED=m
345CONFIG_DM_CRYPT=m 341CONFIG_DM_CRYPT=m
346CONFIG_DM_SNAPSHOT=m 342CONFIG_DM_SNAPSHOT=m
347CONFIG_DM_THIN_PROVISIONING=m 343CONFIG_DM_THIN_PROVISIONING=m
344CONFIG_DM_WRITECACHE=m
348CONFIG_DM_ERA=m 345CONFIG_DM_ERA=m
349CONFIG_DM_MIRROR=m 346CONFIG_DM_MIRROR=m
350CONFIG_DM_RAID=m 347CONFIG_DM_RAID=m
@@ -381,14 +378,15 @@ CONFIG_VETH=m
381# CONFIG_NET_VENDOR_AMAZON is not set 378# CONFIG_NET_VENDOR_AMAZON is not set
382# CONFIG_NET_VENDOR_AQUANTIA is not set 379# CONFIG_NET_VENDOR_AQUANTIA is not set
383# CONFIG_NET_VENDOR_ARC is not set 380# CONFIG_NET_VENDOR_ARC is not set
384# CONFIG_NET_CADENCE is not set
385# CONFIG_NET_VENDOR_BROADCOM is not set 381# CONFIG_NET_VENDOR_BROADCOM is not set
382# CONFIG_NET_CADENCE is not set
386# CONFIG_NET_VENDOR_CORTINA is not set 383# CONFIG_NET_VENDOR_CORTINA is not set
387# CONFIG_NET_VENDOR_EZCHIP is not set 384# CONFIG_NET_VENDOR_EZCHIP is not set
388# CONFIG_NET_VENDOR_HUAWEI is not set 385# CONFIG_NET_VENDOR_HUAWEI is not set
389# CONFIG_NET_VENDOR_INTEL is not set 386# CONFIG_NET_VENDOR_INTEL is not set
390# CONFIG_NET_VENDOR_MARVELL is not set 387# CONFIG_NET_VENDOR_MARVELL is not set
391# CONFIG_NET_VENDOR_MICREL is not set 388# CONFIG_NET_VENDOR_MICREL is not set
389# CONFIG_NET_VENDOR_MICROSEMI is not set
392# CONFIG_NET_VENDOR_NATSEMI is not set 390# CONFIG_NET_VENDOR_NATSEMI is not set
393# CONFIG_NET_VENDOR_NETRONOME is not set 391# CONFIG_NET_VENDOR_NETRONOME is not set
394# CONFIG_NET_VENDOR_NI is not set 392# CONFIG_NET_VENDOR_NI is not set
@@ -400,9 +398,9 @@ CONFIG_VETH=m
400# CONFIG_NET_VENDOR_SOLARFLARE is not set 398# CONFIG_NET_VENDOR_SOLARFLARE is not set
401# CONFIG_NET_VENDOR_SOCIONEXT is not set 399# CONFIG_NET_VENDOR_SOCIONEXT is not set
402# CONFIG_NET_VENDOR_STMICRO is not set 400# CONFIG_NET_VENDOR_STMICRO is not set
401# CONFIG_NET_VENDOR_SYNOPSYS is not set
403# CONFIG_NET_VENDOR_VIA is not set 402# CONFIG_NET_VENDOR_VIA is not set
404# CONFIG_NET_VENDOR_WIZNET is not set 403# CONFIG_NET_VENDOR_WIZNET is not set
405# CONFIG_NET_VENDOR_SYNOPSYS is not set
406CONFIG_PPP=m 404CONFIG_PPP=m
407CONFIG_PPP_BSDCOMP=m 405CONFIG_PPP_BSDCOMP=m
408CONFIG_PPP_DEFLATE=m 406CONFIG_PPP_DEFLATE=m
@@ -440,6 +438,7 @@ CONFIG_HIDRAW=y
440CONFIG_UHID=m 438CONFIG_UHID=m
441# CONFIG_HID_GENERIC is not set 439# CONFIG_HID_GENERIC is not set
442# CONFIG_HID_ITE is not set 440# CONFIG_HID_ITE is not set
441# CONFIG_HID_REDRAGON is not set
443# CONFIG_USB_SUPPORT is not set 442# CONFIG_USB_SUPPORT is not set
444CONFIG_RTC_CLASS=y 443CONFIG_RTC_CLASS=y
445# CONFIG_RTC_NVMEM is not set 444# CONFIG_RTC_NVMEM is not set
@@ -458,7 +457,7 @@ CONFIG_FS_ENCRYPTION=m
458CONFIG_FANOTIFY=y 457CONFIG_FANOTIFY=y
459CONFIG_QUOTA_NETLINK_INTERFACE=y 458CONFIG_QUOTA_NETLINK_INTERFACE=y
460# CONFIG_PRINT_QUOTA_WARNING is not set 459# CONFIG_PRINT_QUOTA_WARNING is not set
461CONFIG_AUTOFS4_FS=m 460CONFIG_AUTOFS_FS=m
462CONFIG_FUSE_FS=m 461CONFIG_FUSE_FS=m
463CONFIG_CUSE=m 462CONFIG_CUSE=m
464CONFIG_OVERLAY_FS=m 463CONFIG_OVERLAY_FS=m
@@ -559,6 +558,7 @@ CONFIG_TEST_KSTRTOX=m
559CONFIG_TEST_PRINTF=m 558CONFIG_TEST_PRINTF=m
560CONFIG_TEST_BITMAP=m 559CONFIG_TEST_BITMAP=m
561CONFIG_TEST_UUID=m 560CONFIG_TEST_UUID=m
561CONFIG_TEST_OVERFLOW=m
562CONFIG_TEST_RHASHTABLE=m 562CONFIG_TEST_RHASHTABLE=m
563CONFIG_TEST_HASH=m 563CONFIG_TEST_HASH=m
564CONFIG_TEST_USER_COPY=m 564CONFIG_TEST_USER_COPY=m
@@ -581,6 +581,11 @@ CONFIG_CRYPTO_CRYPTD=m
581CONFIG_CRYPTO_MCRYPTD=m 581CONFIG_CRYPTO_MCRYPTD=m
582CONFIG_CRYPTO_TEST=m 582CONFIG_CRYPTO_TEST=m
583CONFIG_CRYPTO_CHACHA20POLY1305=m 583CONFIG_CRYPTO_CHACHA20POLY1305=m
584CONFIG_CRYPTO_AEGIS128=m
585CONFIG_CRYPTO_AEGIS128L=m
586CONFIG_CRYPTO_AEGIS256=m
587CONFIG_CRYPTO_MORUS640=m
588CONFIG_CRYPTO_MORUS1280=m
584CONFIG_CRYPTO_CFB=m 589CONFIG_CRYPTO_CFB=m
585CONFIG_CRYPTO_LRW=m 590CONFIG_CRYPTO_LRW=m
586CONFIG_CRYPTO_PCBC=m 591CONFIG_CRYPTO_PCBC=m
@@ -616,6 +621,7 @@ CONFIG_CRYPTO_LZO=m
616CONFIG_CRYPTO_842=m 621CONFIG_CRYPTO_842=m
617CONFIG_CRYPTO_LZ4=m 622CONFIG_CRYPTO_LZ4=m
618CONFIG_CRYPTO_LZ4HC=m 623CONFIG_CRYPTO_LZ4HC=m
624CONFIG_CRYPTO_ZSTD=m
619CONFIG_CRYPTO_ANSI_CPRNG=m 625CONFIG_CRYPTO_ANSI_CPRNG=m
620CONFIG_CRYPTO_DRBG_HASH=y 626CONFIG_CRYPTO_DRBG_HASH=y
621CONFIG_CRYPTO_DRBG_CTR=y 627CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 346c4e75edf8..b3103e51268a 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -50,6 +50,7 @@ CONFIG_UNIX_DIAG=m
50CONFIG_TLS=m 50CONFIG_TLS=m
51CONFIG_XFRM_MIGRATE=y 51CONFIG_XFRM_MIGRATE=y
52CONFIG_NET_KEY=y 52CONFIG_NET_KEY=y
53CONFIG_XDP_SOCKETS=y
53CONFIG_INET=y 54CONFIG_INET=y
54CONFIG_IP_PNP=y 55CONFIG_IP_PNP=y
55CONFIG_IP_PNP_DHCP=y 56CONFIG_IP_PNP_DHCP=y
@@ -96,18 +97,14 @@ CONFIG_NF_CONNTRACK_SANE=m
96CONFIG_NF_CONNTRACK_SIP=m 97CONFIG_NF_CONNTRACK_SIP=m
97CONFIG_NF_CONNTRACK_TFTP=m 98CONFIG_NF_CONNTRACK_TFTP=m
98CONFIG_NF_TABLES=m 99CONFIG_NF_TABLES=m
100CONFIG_NF_TABLES_SET=m
99CONFIG_NF_TABLES_INET=y 101CONFIG_NF_TABLES_INET=y
100CONFIG_NF_TABLES_NETDEV=y 102CONFIG_NF_TABLES_NETDEV=y
101CONFIG_NFT_EXTHDR=m
102CONFIG_NFT_META=m
103CONFIG_NFT_RT=m
104CONFIG_NFT_NUMGEN=m 103CONFIG_NFT_NUMGEN=m
105CONFIG_NFT_CT=m 104CONFIG_NFT_CT=m
106CONFIG_NFT_FLOW_OFFLOAD=m 105CONFIG_NFT_FLOW_OFFLOAD=m
107CONFIG_NFT_SET_RBTREE=m
108CONFIG_NFT_SET_HASH=m
109CONFIG_NFT_SET_BITMAP=m
110CONFIG_NFT_COUNTER=m 106CONFIG_NFT_COUNTER=m
107CONFIG_NFT_CONNLIMIT=m
111CONFIG_NFT_LOG=m 108CONFIG_NFT_LOG=m
112CONFIG_NFT_LIMIT=m 109CONFIG_NFT_LIMIT=m
113CONFIG_NFT_MASQ=m 110CONFIG_NFT_MASQ=m
@@ -120,6 +117,7 @@ CONFIG_NFT_REJECT=m
120CONFIG_NFT_COMPAT=m 117CONFIG_NFT_COMPAT=m
121CONFIG_NFT_HASH=m 118CONFIG_NFT_HASH=m
122CONFIG_NFT_FIB_INET=m 119CONFIG_NFT_FIB_INET=m
120CONFIG_NFT_SOCKET=m
123CONFIG_NFT_DUP_NETDEV=m 121CONFIG_NFT_DUP_NETDEV=m
124CONFIG_NFT_FWD_NETDEV=m 122CONFIG_NFT_FWD_NETDEV=m
125CONFIG_NFT_FIB_NETDEV=m 123CONFIG_NFT_FIB_NETDEV=m
@@ -198,7 +196,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
198CONFIG_IP_SET_HASH_NETIFACE=m 196CONFIG_IP_SET_HASH_NETIFACE=m
199CONFIG_IP_SET_LIST_SET=m 197CONFIG_IP_SET_LIST_SET=m
200CONFIG_NF_CONNTRACK_IPV4=m 198CONFIG_NF_CONNTRACK_IPV4=m
201CONFIG_NF_SOCKET_IPV4=m
202CONFIG_NFT_CHAIN_ROUTE_IPV4=m 199CONFIG_NFT_CHAIN_ROUTE_IPV4=m
203CONFIG_NFT_DUP_IPV4=m 200CONFIG_NFT_DUP_IPV4=m
204CONFIG_NFT_FIB_IPV4=m 201CONFIG_NFT_FIB_IPV4=m
@@ -229,7 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
229CONFIG_IP_NF_ARPFILTER=m 226CONFIG_IP_NF_ARPFILTER=m
230CONFIG_IP_NF_ARP_MANGLE=m 227CONFIG_IP_NF_ARP_MANGLE=m
231CONFIG_NF_CONNTRACK_IPV6=m 228CONFIG_NF_CONNTRACK_IPV6=m
232CONFIG_NF_SOCKET_IPV6=m
233CONFIG_NFT_CHAIN_ROUTE_IPV6=m 229CONFIG_NFT_CHAIN_ROUTE_IPV6=m
234CONFIG_NFT_CHAIN_NAT_IPV6=m 230CONFIG_NFT_CHAIN_NAT_IPV6=m
235CONFIG_NFT_MASQ_IPV6=m 231CONFIG_NFT_MASQ_IPV6=m
@@ -258,7 +254,6 @@ CONFIG_IP6_NF_NAT=m
258CONFIG_IP6_NF_TARGET_MASQUERADE=m 254CONFIG_IP6_NF_TARGET_MASQUERADE=m
259CONFIG_IP6_NF_TARGET_NPT=m 255CONFIG_IP6_NF_TARGET_NPT=m
260CONFIG_NF_TABLES_BRIDGE=y 256CONFIG_NF_TABLES_BRIDGE=y
261CONFIG_NFT_BRIDGE_META=m
262CONFIG_NFT_BRIDGE_REJECT=m 257CONFIG_NFT_BRIDGE_REJECT=m
263CONFIG_NF_LOG_BRIDGE=m 258CONFIG_NF_LOG_BRIDGE=m
264CONFIG_BRIDGE_NF_EBTABLES=m 259CONFIG_BRIDGE_NF_EBTABLES=m
@@ -299,6 +294,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
299CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 294CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
300CONFIG_DNS_RESOLVER=y 295CONFIG_DNS_RESOLVER=y
301CONFIG_BATMAN_ADV=m 296CONFIG_BATMAN_ADV=m
297# CONFIG_BATMAN_ADV_BATMAN_V is not set
302CONFIG_BATMAN_ADV_DAT=y 298CONFIG_BATMAN_ADV_DAT=y
303CONFIG_BATMAN_ADV_NC=y 299CONFIG_BATMAN_ADV_NC=y
304CONFIG_BATMAN_ADV_MCAST=y 300CONFIG_BATMAN_ADV_MCAST=y
@@ -354,6 +350,7 @@ CONFIG_DM_UNSTRIPED=m
354CONFIG_DM_CRYPT=m 350CONFIG_DM_CRYPT=m
355CONFIG_DM_SNAPSHOT=m 351CONFIG_DM_SNAPSHOT=m
356CONFIG_DM_THIN_PROVISIONING=m 352CONFIG_DM_THIN_PROVISIONING=m
353CONFIG_DM_WRITECACHE=m
357CONFIG_DM_ERA=m 354CONFIG_DM_ERA=m
358CONFIG_DM_MIRROR=m 355CONFIG_DM_MIRROR=m
359CONFIG_DM_RAID=m 356CONFIG_DM_RAID=m
@@ -391,14 +388,15 @@ CONFIG_VETH=m
391CONFIG_ATARILANCE=y 388CONFIG_ATARILANCE=y
392# CONFIG_NET_VENDOR_AQUANTIA is not set 389# CONFIG_NET_VENDOR_AQUANTIA is not set
393# CONFIG_NET_VENDOR_ARC is not set 390# CONFIG_NET_VENDOR_ARC is not set
394# CONFIG_NET_CADENCE is not set
395# CONFIG_NET_VENDOR_BROADCOM is not set 391# CONFIG_NET_VENDOR_BROADCOM is not set
392# CONFIG_NET_CADENCE is not set
396# CONFIG_NET_VENDOR_CORTINA is not set 393# CONFIG_NET_VENDOR_CORTINA is not set
397# CONFIG_NET_VENDOR_EZCHIP is not set 394# CONFIG_NET_VENDOR_EZCHIP is not set
398# CONFIG_NET_VENDOR_HUAWEI is not set 395# CONFIG_NET_VENDOR_HUAWEI is not set
399# CONFIG_NET_VENDOR_INTEL is not set 396# CONFIG_NET_VENDOR_INTEL is not set
400# CONFIG_NET_VENDOR_MARVELL is not set 397# CONFIG_NET_VENDOR_MARVELL is not set
401# CONFIG_NET_VENDOR_MICREL is not set 398# CONFIG_NET_VENDOR_MICREL is not set
399# CONFIG_NET_VENDOR_MICROSEMI is not set
402# CONFIG_NET_VENDOR_NETRONOME is not set 400# CONFIG_NET_VENDOR_NETRONOME is not set
403# CONFIG_NET_VENDOR_NI is not set 401# CONFIG_NET_VENDOR_NI is not set
404CONFIG_NE2000=y 402CONFIG_NE2000=y
@@ -411,9 +409,9 @@ CONFIG_NE2000=y
411CONFIG_SMC91X=y 409CONFIG_SMC91X=y
412# CONFIG_NET_VENDOR_SOCIONEXT is not set 410# CONFIG_NET_VENDOR_SOCIONEXT is not set
413# CONFIG_NET_VENDOR_STMICRO is not set 411# CONFIG_NET_VENDOR_STMICRO is not set
412# CONFIG_NET_VENDOR_SYNOPSYS is not set
414# CONFIG_NET_VENDOR_VIA is not set 413# CONFIG_NET_VENDOR_VIA is not set
415# CONFIG_NET_VENDOR_WIZNET is not set 414# CONFIG_NET_VENDOR_WIZNET is not set
416# CONFIG_NET_VENDOR_SYNOPSYS is not set
417CONFIG_PPP=m 415CONFIG_PPP=m
418CONFIG_PPP_BSDCOMP=m 416CONFIG_PPP_BSDCOMP=m
419CONFIG_PPP_DEFLATE=m 417CONFIG_PPP_DEFLATE=m
@@ -480,7 +478,7 @@ CONFIG_FS_ENCRYPTION=m
480CONFIG_FANOTIFY=y 478CONFIG_FANOTIFY=y
481CONFIG_QUOTA_NETLINK_INTERFACE=y 479CONFIG_QUOTA_NETLINK_INTERFACE=y
482# CONFIG_PRINT_QUOTA_WARNING is not set 480# CONFIG_PRINT_QUOTA_WARNING is not set
483CONFIG_AUTOFS4_FS=m 481CONFIG_AUTOFS_FS=m
484CONFIG_FUSE_FS=m 482CONFIG_FUSE_FS=m
485CONFIG_CUSE=m 483CONFIG_CUSE=m
486CONFIG_OVERLAY_FS=m 484CONFIG_OVERLAY_FS=m
@@ -581,6 +579,7 @@ CONFIG_TEST_KSTRTOX=m
581CONFIG_TEST_PRINTF=m 579CONFIG_TEST_PRINTF=m
582CONFIG_TEST_BITMAP=m 580CONFIG_TEST_BITMAP=m
583CONFIG_TEST_UUID=m 581CONFIG_TEST_UUID=m
582CONFIG_TEST_OVERFLOW=m
584CONFIG_TEST_RHASHTABLE=m 583CONFIG_TEST_RHASHTABLE=m
585CONFIG_TEST_HASH=m 584CONFIG_TEST_HASH=m
586CONFIG_TEST_USER_COPY=m 585CONFIG_TEST_USER_COPY=m
@@ -603,6 +602,11 @@ CONFIG_CRYPTO_CRYPTD=m
603CONFIG_CRYPTO_MCRYPTD=m 602CONFIG_CRYPTO_MCRYPTD=m
604CONFIG_CRYPTO_TEST=m 603CONFIG_CRYPTO_TEST=m
605CONFIG_CRYPTO_CHACHA20POLY1305=m 604CONFIG_CRYPTO_CHACHA20POLY1305=m
605CONFIG_CRYPTO_AEGIS128=m
606CONFIG_CRYPTO_AEGIS128L=m
607CONFIG_CRYPTO_AEGIS256=m
608CONFIG_CRYPTO_MORUS640=m
609CONFIG_CRYPTO_MORUS1280=m
606CONFIG_CRYPTO_CFB=m 610CONFIG_CRYPTO_CFB=m
607CONFIG_CRYPTO_LRW=m 611CONFIG_CRYPTO_LRW=m
608CONFIG_CRYPTO_PCBC=m 612CONFIG_CRYPTO_PCBC=m
@@ -638,6 +642,7 @@ CONFIG_CRYPTO_LZO=m
638CONFIG_CRYPTO_842=m 642CONFIG_CRYPTO_842=m
639CONFIG_CRYPTO_LZ4=m 643CONFIG_CRYPTO_LZ4=m
640CONFIG_CRYPTO_LZ4HC=m 644CONFIG_CRYPTO_LZ4HC=m
645CONFIG_CRYPTO_ZSTD=m
641CONFIG_CRYPTO_ANSI_CPRNG=m 646CONFIG_CRYPTO_ANSI_CPRNG=m
642CONFIG_CRYPTO_DRBG_HASH=y 647CONFIG_CRYPTO_DRBG_HASH=y
643CONFIG_CRYPTO_DRBG_CTR=y 648CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index fca9c7aa71a3..fb7d651a4cab 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -48,6 +48,7 @@ CONFIG_UNIX_DIAG=m
48CONFIG_TLS=m 48CONFIG_TLS=m
49CONFIG_XFRM_MIGRATE=y 49CONFIG_XFRM_MIGRATE=y
50CONFIG_NET_KEY=y 50CONFIG_NET_KEY=y
51CONFIG_XDP_SOCKETS=y
51CONFIG_INET=y 52CONFIG_INET=y
52CONFIG_IP_PNP=y 53CONFIG_IP_PNP=y
53CONFIG_IP_PNP_DHCP=y 54CONFIG_IP_PNP_DHCP=y
@@ -94,18 +95,14 @@ CONFIG_NF_CONNTRACK_SANE=m
94CONFIG_NF_CONNTRACK_SIP=m 95CONFIG_NF_CONNTRACK_SIP=m
95CONFIG_NF_CONNTRACK_TFTP=m 96CONFIG_NF_CONNTRACK_TFTP=m
96CONFIG_NF_TABLES=m 97CONFIG_NF_TABLES=m
98CONFIG_NF_TABLES_SET=m
97CONFIG_NF_TABLES_INET=y 99CONFIG_NF_TABLES_INET=y
98CONFIG_NF_TABLES_NETDEV=y 100CONFIG_NF_TABLES_NETDEV=y
99CONFIG_NFT_EXTHDR=m
100CONFIG_NFT_META=m
101CONFIG_NFT_RT=m
102CONFIG_NFT_NUMGEN=m 101CONFIG_NFT_NUMGEN=m
103CONFIG_NFT_CT=m 102CONFIG_NFT_CT=m
104CONFIG_NFT_FLOW_OFFLOAD=m 103CONFIG_NFT_FLOW_OFFLOAD=m
105CONFIG_NFT_SET_RBTREE=m
106CONFIG_NFT_SET_HASH=m
107CONFIG_NFT_SET_BITMAP=m
108CONFIG_NFT_COUNTER=m 104CONFIG_NFT_COUNTER=m
105CONFIG_NFT_CONNLIMIT=m
109CONFIG_NFT_LOG=m 106CONFIG_NFT_LOG=m
110CONFIG_NFT_LIMIT=m 107CONFIG_NFT_LIMIT=m
111CONFIG_NFT_MASQ=m 108CONFIG_NFT_MASQ=m
@@ -118,6 +115,7 @@ CONFIG_NFT_REJECT=m
118CONFIG_NFT_COMPAT=m 115CONFIG_NFT_COMPAT=m
119CONFIG_NFT_HASH=m 116CONFIG_NFT_HASH=m
120CONFIG_NFT_FIB_INET=m 117CONFIG_NFT_FIB_INET=m
118CONFIG_NFT_SOCKET=m
121CONFIG_NFT_DUP_NETDEV=m 119CONFIG_NFT_DUP_NETDEV=m
122CONFIG_NFT_FWD_NETDEV=m 120CONFIG_NFT_FWD_NETDEV=m
123CONFIG_NFT_FIB_NETDEV=m 121CONFIG_NFT_FIB_NETDEV=m
@@ -196,7 +194,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
196CONFIG_IP_SET_HASH_NETIFACE=m 194CONFIG_IP_SET_HASH_NETIFACE=m
197CONFIG_IP_SET_LIST_SET=m 195CONFIG_IP_SET_LIST_SET=m
198CONFIG_NF_CONNTRACK_IPV4=m 196CONFIG_NF_CONNTRACK_IPV4=m
199CONFIG_NF_SOCKET_IPV4=m
200CONFIG_NFT_CHAIN_ROUTE_IPV4=m 197CONFIG_NFT_CHAIN_ROUTE_IPV4=m
201CONFIG_NFT_DUP_IPV4=m 198CONFIG_NFT_DUP_IPV4=m
202CONFIG_NFT_FIB_IPV4=m 199CONFIG_NFT_FIB_IPV4=m
@@ -227,7 +224,6 @@ CONFIG_IP_NF_ARPTABLES=m
227CONFIG_IP_NF_ARPFILTER=m 224CONFIG_IP_NF_ARPFILTER=m
228CONFIG_IP_NF_ARP_MANGLE=m 225CONFIG_IP_NF_ARP_MANGLE=m
229CONFIG_NF_CONNTRACK_IPV6=m 226CONFIG_NF_CONNTRACK_IPV6=m
230CONFIG_NF_SOCKET_IPV6=m
231CONFIG_NFT_CHAIN_ROUTE_IPV6=m 227CONFIG_NFT_CHAIN_ROUTE_IPV6=m
232CONFIG_NFT_CHAIN_NAT_IPV6=m 228CONFIG_NFT_CHAIN_NAT_IPV6=m
233CONFIG_NFT_MASQ_IPV6=m 229CONFIG_NFT_MASQ_IPV6=m
@@ -256,7 +252,6 @@ CONFIG_IP6_NF_NAT=m
256CONFIG_IP6_NF_TARGET_MASQUERADE=m 252CONFIG_IP6_NF_TARGET_MASQUERADE=m
257CONFIG_IP6_NF_TARGET_NPT=m 253CONFIG_IP6_NF_TARGET_NPT=m
258CONFIG_NF_TABLES_BRIDGE=y 254CONFIG_NF_TABLES_BRIDGE=y
259CONFIG_NFT_BRIDGE_META=m
260CONFIG_NFT_BRIDGE_REJECT=m 255CONFIG_NFT_BRIDGE_REJECT=m
261CONFIG_NF_LOG_BRIDGE=m 256CONFIG_NF_LOG_BRIDGE=m
262CONFIG_BRIDGE_NF_EBTABLES=m 257CONFIG_BRIDGE_NF_EBTABLES=m
@@ -297,6 +292,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
297CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 292CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
298CONFIG_DNS_RESOLVER=y 293CONFIG_DNS_RESOLVER=y
299CONFIG_BATMAN_ADV=m 294CONFIG_BATMAN_ADV=m
295# CONFIG_BATMAN_ADV_BATMAN_V is not set
300CONFIG_BATMAN_ADV_DAT=y 296CONFIG_BATMAN_ADV_DAT=y
301CONFIG_BATMAN_ADV_NC=y 297CONFIG_BATMAN_ADV_NC=y
302CONFIG_BATMAN_ADV_MCAST=y 298CONFIG_BATMAN_ADV_MCAST=y
@@ -344,6 +340,7 @@ CONFIG_DM_UNSTRIPED=m
344CONFIG_DM_CRYPT=m 340CONFIG_DM_CRYPT=m
345CONFIG_DM_SNAPSHOT=m 341CONFIG_DM_SNAPSHOT=m
346CONFIG_DM_THIN_PROVISIONING=m 342CONFIG_DM_THIN_PROVISIONING=m
343CONFIG_DM_WRITECACHE=m
347CONFIG_DM_ERA=m 344CONFIG_DM_ERA=m
348CONFIG_DM_MIRROR=m 345CONFIG_DM_MIRROR=m
349CONFIG_DM_RAID=m 346CONFIG_DM_RAID=m
@@ -380,14 +377,15 @@ CONFIG_VETH=m
380# CONFIG_NET_VENDOR_AMAZON is not set 377# CONFIG_NET_VENDOR_AMAZON is not set
381# CONFIG_NET_VENDOR_AQUANTIA is not set 378# CONFIG_NET_VENDOR_AQUANTIA is not set
382# CONFIG_NET_VENDOR_ARC is not set 379# CONFIG_NET_VENDOR_ARC is not set
383# CONFIG_NET_CADENCE is not set
384# CONFIG_NET_VENDOR_BROADCOM is not set 380# CONFIG_NET_VENDOR_BROADCOM is not set
381# CONFIG_NET_CADENCE is not set
385# CONFIG_NET_VENDOR_CORTINA is not set 382# CONFIG_NET_VENDOR_CORTINA is not set
386# CONFIG_NET_VENDOR_EZCHIP is not set 383# CONFIG_NET_VENDOR_EZCHIP is not set
387# CONFIG_NET_VENDOR_HUAWEI is not set 384# CONFIG_NET_VENDOR_HUAWEI is not set
388CONFIG_BVME6000_NET=y 385CONFIG_BVME6000_NET=y
389# CONFIG_NET_VENDOR_MARVELL is not set 386# CONFIG_NET_VENDOR_MARVELL is not set
390# CONFIG_NET_VENDOR_MICREL is not set 387# CONFIG_NET_VENDOR_MICREL is not set
388# CONFIG_NET_VENDOR_MICROSEMI is not set
391# CONFIG_NET_VENDOR_NATSEMI is not set 389# CONFIG_NET_VENDOR_NATSEMI is not set
392# CONFIG_NET_VENDOR_NETRONOME is not set 390# CONFIG_NET_VENDOR_NETRONOME is not set
393# CONFIG_NET_VENDOR_NI is not set 391# CONFIG_NET_VENDOR_NI is not set
@@ -399,9 +397,9 @@ CONFIG_BVME6000_NET=y
399# CONFIG_NET_VENDOR_SOLARFLARE is not set 397# CONFIG_NET_VENDOR_SOLARFLARE is not set
400# CONFIG_NET_VENDOR_SOCIONEXT is not set 398# CONFIG_NET_VENDOR_SOCIONEXT is not set
401# CONFIG_NET_VENDOR_STMICRO is not set 399# CONFIG_NET_VENDOR_STMICRO is not set
400# CONFIG_NET_VENDOR_SYNOPSYS is not set
402# CONFIG_NET_VENDOR_VIA is not set 401# CONFIG_NET_VENDOR_VIA is not set
403# CONFIG_NET_VENDOR_WIZNET is not set 402# CONFIG_NET_VENDOR_WIZNET is not set
404# CONFIG_NET_VENDOR_SYNOPSYS is not set
405CONFIG_PPP=m 403CONFIG_PPP=m
406CONFIG_PPP_BSDCOMP=m 404CONFIG_PPP_BSDCOMP=m
407CONFIG_PPP_DEFLATE=m 405CONFIG_PPP_DEFLATE=m
@@ -433,6 +431,7 @@ CONFIG_HIDRAW=y
433CONFIG_UHID=m 431CONFIG_UHID=m
434# CONFIG_HID_GENERIC is not set 432# CONFIG_HID_GENERIC is not set
435# CONFIG_HID_ITE is not set 433# CONFIG_HID_ITE is not set
434# CONFIG_HID_REDRAGON is not set
436# CONFIG_USB_SUPPORT is not set 435# CONFIG_USB_SUPPORT is not set
437CONFIG_RTC_CLASS=y 436CONFIG_RTC_CLASS=y
438# CONFIG_RTC_NVMEM is not set 437# CONFIG_RTC_NVMEM is not set
@@ -450,7 +449,7 @@ CONFIG_FS_ENCRYPTION=m
450CONFIG_FANOTIFY=y 449CONFIG_FANOTIFY=y
451CONFIG_QUOTA_NETLINK_INTERFACE=y 450CONFIG_QUOTA_NETLINK_INTERFACE=y
452# CONFIG_PRINT_QUOTA_WARNING is not set 451# CONFIG_PRINT_QUOTA_WARNING is not set
453CONFIG_AUTOFS4_FS=m 452CONFIG_AUTOFS_FS=m
454CONFIG_FUSE_FS=m 453CONFIG_FUSE_FS=m
455CONFIG_CUSE=m 454CONFIG_CUSE=m
456CONFIG_OVERLAY_FS=m 455CONFIG_OVERLAY_FS=m
@@ -551,6 +550,7 @@ CONFIG_TEST_KSTRTOX=m
551CONFIG_TEST_PRINTF=m 550CONFIG_TEST_PRINTF=m
552CONFIG_TEST_BITMAP=m 551CONFIG_TEST_BITMAP=m
553CONFIG_TEST_UUID=m 552CONFIG_TEST_UUID=m
553CONFIG_TEST_OVERFLOW=m
554CONFIG_TEST_RHASHTABLE=m 554CONFIG_TEST_RHASHTABLE=m
555CONFIG_TEST_HASH=m 555CONFIG_TEST_HASH=m
556CONFIG_TEST_USER_COPY=m 556CONFIG_TEST_USER_COPY=m
@@ -573,6 +573,11 @@ CONFIG_CRYPTO_CRYPTD=m
573CONFIG_CRYPTO_MCRYPTD=m 573CONFIG_CRYPTO_MCRYPTD=m
574CONFIG_CRYPTO_TEST=m 574CONFIG_CRYPTO_TEST=m
575CONFIG_CRYPTO_CHACHA20POLY1305=m 575CONFIG_CRYPTO_CHACHA20POLY1305=m
576CONFIG_CRYPTO_AEGIS128=m
577CONFIG_CRYPTO_AEGIS128L=m
578CONFIG_CRYPTO_AEGIS256=m
579CONFIG_CRYPTO_MORUS640=m
580CONFIG_CRYPTO_MORUS1280=m
576CONFIG_CRYPTO_CFB=m 581CONFIG_CRYPTO_CFB=m
577CONFIG_CRYPTO_LRW=m 582CONFIG_CRYPTO_LRW=m
578CONFIG_CRYPTO_PCBC=m 583CONFIG_CRYPTO_PCBC=m
@@ -608,6 +613,7 @@ CONFIG_CRYPTO_LZO=m
608CONFIG_CRYPTO_842=m 613CONFIG_CRYPTO_842=m
609CONFIG_CRYPTO_LZ4=m 614CONFIG_CRYPTO_LZ4=m
610CONFIG_CRYPTO_LZ4HC=m 615CONFIG_CRYPTO_LZ4HC=m
616CONFIG_CRYPTO_ZSTD=m
611CONFIG_CRYPTO_ANSI_CPRNG=m 617CONFIG_CRYPTO_ANSI_CPRNG=m
612CONFIG_CRYPTO_DRBG_HASH=y 618CONFIG_CRYPTO_DRBG_HASH=y
613CONFIG_CRYPTO_DRBG_CTR=y 619CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index f9eab174915c..6b37f5537c39 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -50,6 +50,7 @@ CONFIG_UNIX_DIAG=m
50CONFIG_TLS=m 50CONFIG_TLS=m
51CONFIG_XFRM_MIGRATE=y 51CONFIG_XFRM_MIGRATE=y
52CONFIG_NET_KEY=y 52CONFIG_NET_KEY=y
53CONFIG_XDP_SOCKETS=y
53CONFIG_INET=y 54CONFIG_INET=y
54CONFIG_IP_PNP=y 55CONFIG_IP_PNP=y
55CONFIG_IP_PNP_DHCP=y 56CONFIG_IP_PNP_DHCP=y
@@ -96,18 +97,14 @@ CONFIG_NF_CONNTRACK_SANE=m
96CONFIG_NF_CONNTRACK_SIP=m 97CONFIG_NF_CONNTRACK_SIP=m
97CONFIG_NF_CONNTRACK_TFTP=m 98CONFIG_NF_CONNTRACK_TFTP=m
98CONFIG_NF_TABLES=m 99CONFIG_NF_TABLES=m
100CONFIG_NF_TABLES_SET=m
99CONFIG_NF_TABLES_INET=y 101CONFIG_NF_TABLES_INET=y
100CONFIG_NF_TABLES_NETDEV=y 102CONFIG_NF_TABLES_NETDEV=y
101CONFIG_NFT_EXTHDR=m
102CONFIG_NFT_META=m
103CONFIG_NFT_RT=m
104CONFIG_NFT_NUMGEN=m 103CONFIG_NFT_NUMGEN=m
105CONFIG_NFT_CT=m 104CONFIG_NFT_CT=m
106CONFIG_NFT_FLOW_OFFLOAD=m 105CONFIG_NFT_FLOW_OFFLOAD=m
107CONFIG_NFT_SET_RBTREE=m
108CONFIG_NFT_SET_HASH=m
109CONFIG_NFT_SET_BITMAP=m
110CONFIG_NFT_COUNTER=m 106CONFIG_NFT_COUNTER=m
107CONFIG_NFT_CONNLIMIT=m
111CONFIG_NFT_LOG=m 108CONFIG_NFT_LOG=m
112CONFIG_NFT_LIMIT=m 109CONFIG_NFT_LIMIT=m
113CONFIG_NFT_MASQ=m 110CONFIG_NFT_MASQ=m
@@ -120,6 +117,7 @@ CONFIG_NFT_REJECT=m
120CONFIG_NFT_COMPAT=m 117CONFIG_NFT_COMPAT=m
121CONFIG_NFT_HASH=m 118CONFIG_NFT_HASH=m
122CONFIG_NFT_FIB_INET=m 119CONFIG_NFT_FIB_INET=m
120CONFIG_NFT_SOCKET=m
123CONFIG_NFT_DUP_NETDEV=m 121CONFIG_NFT_DUP_NETDEV=m
124CONFIG_NFT_FWD_NETDEV=m 122CONFIG_NFT_FWD_NETDEV=m
125CONFIG_NFT_FIB_NETDEV=m 123CONFIG_NFT_FIB_NETDEV=m
@@ -198,7 +196,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
198CONFIG_IP_SET_HASH_NETIFACE=m 196CONFIG_IP_SET_HASH_NETIFACE=m
199CONFIG_IP_SET_LIST_SET=m 197CONFIG_IP_SET_LIST_SET=m
200CONFIG_NF_CONNTRACK_IPV4=m 198CONFIG_NF_CONNTRACK_IPV4=m
201CONFIG_NF_SOCKET_IPV4=m
202CONFIG_NFT_CHAIN_ROUTE_IPV4=m 199CONFIG_NFT_CHAIN_ROUTE_IPV4=m
203CONFIG_NFT_DUP_IPV4=m 200CONFIG_NFT_DUP_IPV4=m
204CONFIG_NFT_FIB_IPV4=m 201CONFIG_NFT_FIB_IPV4=m
@@ -229,7 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
229CONFIG_IP_NF_ARPFILTER=m 226CONFIG_IP_NF_ARPFILTER=m
230CONFIG_IP_NF_ARP_MANGLE=m 227CONFIG_IP_NF_ARP_MANGLE=m
231CONFIG_NF_CONNTRACK_IPV6=m 228CONFIG_NF_CONNTRACK_IPV6=m
232CONFIG_NF_SOCKET_IPV6=m
233CONFIG_NFT_CHAIN_ROUTE_IPV6=m 229CONFIG_NFT_CHAIN_ROUTE_IPV6=m
234CONFIG_NFT_CHAIN_NAT_IPV6=m 230CONFIG_NFT_CHAIN_NAT_IPV6=m
235CONFIG_NFT_MASQ_IPV6=m 231CONFIG_NFT_MASQ_IPV6=m
@@ -258,7 +254,6 @@ CONFIG_IP6_NF_NAT=m
258CONFIG_IP6_NF_TARGET_MASQUERADE=m 254CONFIG_IP6_NF_TARGET_MASQUERADE=m
259CONFIG_IP6_NF_TARGET_NPT=m 255CONFIG_IP6_NF_TARGET_NPT=m
260CONFIG_NF_TABLES_BRIDGE=y 256CONFIG_NF_TABLES_BRIDGE=y
261CONFIG_NFT_BRIDGE_META=m
262CONFIG_NFT_BRIDGE_REJECT=m 257CONFIG_NFT_BRIDGE_REJECT=m
263CONFIG_NF_LOG_BRIDGE=m 258CONFIG_NF_LOG_BRIDGE=m
264CONFIG_BRIDGE_NF_EBTABLES=m 259CONFIG_BRIDGE_NF_EBTABLES=m
@@ -299,6 +294,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
299CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 294CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
300CONFIG_DNS_RESOLVER=y 295CONFIG_DNS_RESOLVER=y
301CONFIG_BATMAN_ADV=m 296CONFIG_BATMAN_ADV=m
297# CONFIG_BATMAN_ADV_BATMAN_V is not set
302CONFIG_BATMAN_ADV_DAT=y 298CONFIG_BATMAN_ADV_DAT=y
303CONFIG_BATMAN_ADV_NC=y 299CONFIG_BATMAN_ADV_NC=y
304CONFIG_BATMAN_ADV_MCAST=y 300CONFIG_BATMAN_ADV_MCAST=y
@@ -345,6 +341,7 @@ CONFIG_DM_UNSTRIPED=m
345CONFIG_DM_CRYPT=m 341CONFIG_DM_CRYPT=m
346CONFIG_DM_SNAPSHOT=m 342CONFIG_DM_SNAPSHOT=m
347CONFIG_DM_THIN_PROVISIONING=m 343CONFIG_DM_THIN_PROVISIONING=m
344CONFIG_DM_WRITECACHE=m
348CONFIG_DM_ERA=m 345CONFIG_DM_ERA=m
349CONFIG_DM_MIRROR=m 346CONFIG_DM_MIRROR=m
350CONFIG_DM_RAID=m 347CONFIG_DM_RAID=m
@@ -382,14 +379,15 @@ CONFIG_VETH=m
382CONFIG_HPLANCE=y 379CONFIG_HPLANCE=y
383# CONFIG_NET_VENDOR_AQUANTIA is not set 380# CONFIG_NET_VENDOR_AQUANTIA is not set
384# CONFIG_NET_VENDOR_ARC is not set 381# CONFIG_NET_VENDOR_ARC is not set
385# CONFIG_NET_CADENCE is not set
386# CONFIG_NET_VENDOR_BROADCOM is not set 382# CONFIG_NET_VENDOR_BROADCOM is not set
383# CONFIG_NET_CADENCE is not set
387# CONFIG_NET_VENDOR_CORTINA is not set 384# CONFIG_NET_VENDOR_CORTINA is not set
388# CONFIG_NET_VENDOR_EZCHIP is not set 385# CONFIG_NET_VENDOR_EZCHIP is not set
389# CONFIG_NET_VENDOR_HUAWEI is not set 386# CONFIG_NET_VENDOR_HUAWEI is not set
390# CONFIG_NET_VENDOR_INTEL is not set 387# CONFIG_NET_VENDOR_INTEL is not set
391# CONFIG_NET_VENDOR_MARVELL is not set 388# CONFIG_NET_VENDOR_MARVELL is not set
392# CONFIG_NET_VENDOR_MICREL is not set 389# CONFIG_NET_VENDOR_MICREL is not set
390# CONFIG_NET_VENDOR_MICROSEMI is not set
393# CONFIG_NET_VENDOR_NATSEMI is not set 391# CONFIG_NET_VENDOR_NATSEMI is not set
394# CONFIG_NET_VENDOR_NETRONOME is not set 392# CONFIG_NET_VENDOR_NETRONOME is not set
395# CONFIG_NET_VENDOR_NI is not set 393# CONFIG_NET_VENDOR_NI is not set
@@ -401,9 +399,9 @@ CONFIG_HPLANCE=y
401# CONFIG_NET_VENDOR_SOLARFLARE is not set 399# CONFIG_NET_VENDOR_SOLARFLARE is not set
402# CONFIG_NET_VENDOR_SOCIONEXT is not set 400# CONFIG_NET_VENDOR_SOCIONEXT is not set
403# CONFIG_NET_VENDOR_STMICRO is not set 401# CONFIG_NET_VENDOR_STMICRO is not set
402# CONFIG_NET_VENDOR_SYNOPSYS is not set
404# CONFIG_NET_VENDOR_VIA is not set 403# CONFIG_NET_VENDOR_VIA is not set
405# CONFIG_NET_VENDOR_WIZNET is not set 404# CONFIG_NET_VENDOR_WIZNET is not set
406# CONFIG_NET_VENDOR_SYNOPSYS is not set
407CONFIG_PPP=m 405CONFIG_PPP=m
408CONFIG_PPP_BSDCOMP=m 406CONFIG_PPP_BSDCOMP=m
409CONFIG_PPP_DEFLATE=m 407CONFIG_PPP_DEFLATE=m
@@ -443,6 +441,7 @@ CONFIG_HIDRAW=y
443CONFIG_UHID=m 441CONFIG_UHID=m
444# CONFIG_HID_GENERIC is not set 442# CONFIG_HID_GENERIC is not set
445# CONFIG_HID_ITE is not set 443# CONFIG_HID_ITE is not set
444# CONFIG_HID_REDRAGON is not set
446# CONFIG_USB_SUPPORT is not set 445# CONFIG_USB_SUPPORT is not set
447CONFIG_RTC_CLASS=y 446CONFIG_RTC_CLASS=y
448# CONFIG_RTC_NVMEM is not set 447# CONFIG_RTC_NVMEM is not set
@@ -460,7 +459,7 @@ CONFIG_FS_ENCRYPTION=m
460CONFIG_FANOTIFY=y 459CONFIG_FANOTIFY=y
461CONFIG_QUOTA_NETLINK_INTERFACE=y 460CONFIG_QUOTA_NETLINK_INTERFACE=y
462# CONFIG_PRINT_QUOTA_WARNING is not set 461# CONFIG_PRINT_QUOTA_WARNING is not set
463CONFIG_AUTOFS4_FS=m 462CONFIG_AUTOFS_FS=m
464CONFIG_FUSE_FS=m 463CONFIG_FUSE_FS=m
465CONFIG_CUSE=m 464CONFIG_CUSE=m
466CONFIG_OVERLAY_FS=m 465CONFIG_OVERLAY_FS=m
@@ -561,6 +560,7 @@ CONFIG_TEST_KSTRTOX=m
561CONFIG_TEST_PRINTF=m 560CONFIG_TEST_PRINTF=m
562CONFIG_TEST_BITMAP=m 561CONFIG_TEST_BITMAP=m
563CONFIG_TEST_UUID=m 562CONFIG_TEST_UUID=m
563CONFIG_TEST_OVERFLOW=m
564CONFIG_TEST_RHASHTABLE=m 564CONFIG_TEST_RHASHTABLE=m
565CONFIG_TEST_HASH=m 565CONFIG_TEST_HASH=m
566CONFIG_TEST_USER_COPY=m 566CONFIG_TEST_USER_COPY=m
@@ -583,6 +583,11 @@ CONFIG_CRYPTO_CRYPTD=m
583CONFIG_CRYPTO_MCRYPTD=m 583CONFIG_CRYPTO_MCRYPTD=m
584CONFIG_CRYPTO_TEST=m 584CONFIG_CRYPTO_TEST=m
585CONFIG_CRYPTO_CHACHA20POLY1305=m 585CONFIG_CRYPTO_CHACHA20POLY1305=m
586CONFIG_CRYPTO_AEGIS128=m
587CONFIG_CRYPTO_AEGIS128L=m
588CONFIG_CRYPTO_AEGIS256=m
589CONFIG_CRYPTO_MORUS640=m
590CONFIG_CRYPTO_MORUS1280=m
586CONFIG_CRYPTO_CFB=m 591CONFIG_CRYPTO_CFB=m
587CONFIG_CRYPTO_LRW=m 592CONFIG_CRYPTO_LRW=m
588CONFIG_CRYPTO_PCBC=m 593CONFIG_CRYPTO_PCBC=m
@@ -618,6 +623,7 @@ CONFIG_CRYPTO_LZO=m
618CONFIG_CRYPTO_842=m 623CONFIG_CRYPTO_842=m
619CONFIG_CRYPTO_LZ4=m 624CONFIG_CRYPTO_LZ4=m
620CONFIG_CRYPTO_LZ4HC=m 625CONFIG_CRYPTO_LZ4HC=m
626CONFIG_CRYPTO_ZSTD=m
621CONFIG_CRYPTO_ANSI_CPRNG=m 627CONFIG_CRYPTO_ANSI_CPRNG=m
622CONFIG_CRYPTO_DRBG_HASH=y 628CONFIG_CRYPTO_DRBG_HASH=y
623CONFIG_CRYPTO_DRBG_CTR=y 629CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index b52e597899eb..930cc2965a11 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -49,6 +49,7 @@ CONFIG_UNIX_DIAG=m
49CONFIG_TLS=m 49CONFIG_TLS=m
50CONFIG_XFRM_MIGRATE=y 50CONFIG_XFRM_MIGRATE=y
51CONFIG_NET_KEY=y 51CONFIG_NET_KEY=y
52CONFIG_XDP_SOCKETS=y
52CONFIG_INET=y 53CONFIG_INET=y
53CONFIG_IP_PNP=y 54CONFIG_IP_PNP=y
54CONFIG_IP_PNP_DHCP=y 55CONFIG_IP_PNP_DHCP=y
@@ -95,18 +96,14 @@ CONFIG_NF_CONNTRACK_SANE=m
95CONFIG_NF_CONNTRACK_SIP=m 96CONFIG_NF_CONNTRACK_SIP=m
96CONFIG_NF_CONNTRACK_TFTP=m 97CONFIG_NF_CONNTRACK_TFTP=m
97CONFIG_NF_TABLES=m 98CONFIG_NF_TABLES=m
99CONFIG_NF_TABLES_SET=m
98CONFIG_NF_TABLES_INET=y 100CONFIG_NF_TABLES_INET=y
99CONFIG_NF_TABLES_NETDEV=y 101CONFIG_NF_TABLES_NETDEV=y
100CONFIG_NFT_EXTHDR=m
101CONFIG_NFT_META=m
102CONFIG_NFT_RT=m
103CONFIG_NFT_NUMGEN=m 102CONFIG_NFT_NUMGEN=m
104CONFIG_NFT_CT=m 103CONFIG_NFT_CT=m
105CONFIG_NFT_FLOW_OFFLOAD=m 104CONFIG_NFT_FLOW_OFFLOAD=m
106CONFIG_NFT_SET_RBTREE=m
107CONFIG_NFT_SET_HASH=m
108CONFIG_NFT_SET_BITMAP=m
109CONFIG_NFT_COUNTER=m 105CONFIG_NFT_COUNTER=m
106CONFIG_NFT_CONNLIMIT=m
110CONFIG_NFT_LOG=m 107CONFIG_NFT_LOG=m
111CONFIG_NFT_LIMIT=m 108CONFIG_NFT_LIMIT=m
112CONFIG_NFT_MASQ=m 109CONFIG_NFT_MASQ=m
@@ -119,6 +116,7 @@ CONFIG_NFT_REJECT=m
119CONFIG_NFT_COMPAT=m 116CONFIG_NFT_COMPAT=m
120CONFIG_NFT_HASH=m 117CONFIG_NFT_HASH=m
121CONFIG_NFT_FIB_INET=m 118CONFIG_NFT_FIB_INET=m
119CONFIG_NFT_SOCKET=m
122CONFIG_NFT_DUP_NETDEV=m 120CONFIG_NFT_DUP_NETDEV=m
123CONFIG_NFT_FWD_NETDEV=m 121CONFIG_NFT_FWD_NETDEV=m
124CONFIG_NFT_FIB_NETDEV=m 122CONFIG_NFT_FIB_NETDEV=m
@@ -197,7 +195,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
197CONFIG_IP_SET_HASH_NETIFACE=m 195CONFIG_IP_SET_HASH_NETIFACE=m
198CONFIG_IP_SET_LIST_SET=m 196CONFIG_IP_SET_LIST_SET=m
199CONFIG_NF_CONNTRACK_IPV4=m 197CONFIG_NF_CONNTRACK_IPV4=m
200CONFIG_NF_SOCKET_IPV4=m
201CONFIG_NFT_CHAIN_ROUTE_IPV4=m 198CONFIG_NFT_CHAIN_ROUTE_IPV4=m
202CONFIG_NFT_DUP_IPV4=m 199CONFIG_NFT_DUP_IPV4=m
203CONFIG_NFT_FIB_IPV4=m 200CONFIG_NFT_FIB_IPV4=m
@@ -228,7 +225,6 @@ CONFIG_IP_NF_ARPTABLES=m
228CONFIG_IP_NF_ARPFILTER=m 225CONFIG_IP_NF_ARPFILTER=m
229CONFIG_IP_NF_ARP_MANGLE=m 226CONFIG_IP_NF_ARP_MANGLE=m
230CONFIG_NF_CONNTRACK_IPV6=m 227CONFIG_NF_CONNTRACK_IPV6=m
231CONFIG_NF_SOCKET_IPV6=m
232CONFIG_NFT_CHAIN_ROUTE_IPV6=m 228CONFIG_NFT_CHAIN_ROUTE_IPV6=m
233CONFIG_NFT_CHAIN_NAT_IPV6=m 229CONFIG_NFT_CHAIN_NAT_IPV6=m
234CONFIG_NFT_MASQ_IPV6=m 230CONFIG_NFT_MASQ_IPV6=m
@@ -257,7 +253,6 @@ CONFIG_IP6_NF_NAT=m
257CONFIG_IP6_NF_TARGET_MASQUERADE=m 253CONFIG_IP6_NF_TARGET_MASQUERADE=m
258CONFIG_IP6_NF_TARGET_NPT=m 254CONFIG_IP6_NF_TARGET_NPT=m
259CONFIG_NF_TABLES_BRIDGE=y 255CONFIG_NF_TABLES_BRIDGE=y
260CONFIG_NFT_BRIDGE_META=m
261CONFIG_NFT_BRIDGE_REJECT=m 256CONFIG_NFT_BRIDGE_REJECT=m
262CONFIG_NF_LOG_BRIDGE=m 257CONFIG_NF_LOG_BRIDGE=m
263CONFIG_BRIDGE_NF_EBTABLES=m 258CONFIG_BRIDGE_NF_EBTABLES=m
@@ -301,6 +296,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
301CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 296CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
302CONFIG_DNS_RESOLVER=y 297CONFIG_DNS_RESOLVER=y
303CONFIG_BATMAN_ADV=m 298CONFIG_BATMAN_ADV=m
299# CONFIG_BATMAN_ADV_BATMAN_V is not set
304CONFIG_BATMAN_ADV_DAT=y 300CONFIG_BATMAN_ADV_DAT=y
305CONFIG_BATMAN_ADV_NC=y 301CONFIG_BATMAN_ADV_NC=y
306CONFIG_BATMAN_ADV_MCAST=y 302CONFIG_BATMAN_ADV_MCAST=y
@@ -354,6 +350,7 @@ CONFIG_DM_UNSTRIPED=m
354CONFIG_DM_CRYPT=m 350CONFIG_DM_CRYPT=m
355CONFIG_DM_SNAPSHOT=m 351CONFIG_DM_SNAPSHOT=m
356CONFIG_DM_THIN_PROVISIONING=m 352CONFIG_DM_THIN_PROVISIONING=m
353CONFIG_DM_WRITECACHE=m
357CONFIG_DM_ERA=m 354CONFIG_DM_ERA=m
358CONFIG_DM_MIRROR=m 355CONFIG_DM_MIRROR=m
359CONFIG_DM_RAID=m 356CONFIG_DM_RAID=m
@@ -398,8 +395,8 @@ CONFIG_VETH=m
398CONFIG_MACMACE=y 395CONFIG_MACMACE=y
399# CONFIG_NET_VENDOR_AQUANTIA is not set 396# CONFIG_NET_VENDOR_AQUANTIA is not set
400# CONFIG_NET_VENDOR_ARC is not set 397# CONFIG_NET_VENDOR_ARC is not set
401# CONFIG_NET_CADENCE is not set
402# CONFIG_NET_VENDOR_BROADCOM is not set 398# CONFIG_NET_VENDOR_BROADCOM is not set
399# CONFIG_NET_CADENCE is not set
403CONFIG_MAC89x0=y 400CONFIG_MAC89x0=y
404# CONFIG_NET_VENDOR_CORTINA is not set 401# CONFIG_NET_VENDOR_CORTINA is not set
405# CONFIG_NET_VENDOR_EZCHIP is not set 402# CONFIG_NET_VENDOR_EZCHIP is not set
@@ -407,6 +404,7 @@ CONFIG_MAC89x0=y
407# CONFIG_NET_VENDOR_INTEL is not set 404# CONFIG_NET_VENDOR_INTEL is not set
408# CONFIG_NET_VENDOR_MARVELL is not set 405# CONFIG_NET_VENDOR_MARVELL is not set
409# CONFIG_NET_VENDOR_MICREL is not set 406# CONFIG_NET_VENDOR_MICREL is not set
407# CONFIG_NET_VENDOR_MICROSEMI is not set
410CONFIG_MACSONIC=y 408CONFIG_MACSONIC=y
411# CONFIG_NET_VENDOR_NETRONOME is not set 409# CONFIG_NET_VENDOR_NETRONOME is not set
412# CONFIG_NET_VENDOR_NI is not set 410# CONFIG_NET_VENDOR_NI is not set
@@ -420,9 +418,9 @@ CONFIG_MAC8390=y
420# CONFIG_NET_VENDOR_SMSC is not set 418# CONFIG_NET_VENDOR_SMSC is not set
421# CONFIG_NET_VENDOR_SOCIONEXT is not set 419# CONFIG_NET_VENDOR_SOCIONEXT is not set
422# CONFIG_NET_VENDOR_STMICRO is not set 420# CONFIG_NET_VENDOR_STMICRO is not set
421# CONFIG_NET_VENDOR_SYNOPSYS is not set
423# CONFIG_NET_VENDOR_VIA is not set 422# CONFIG_NET_VENDOR_VIA is not set
424# CONFIG_NET_VENDOR_WIZNET is not set 423# CONFIG_NET_VENDOR_WIZNET is not set
425# CONFIG_NET_VENDOR_SYNOPSYS is not set
426CONFIG_PPP=m 424CONFIG_PPP=m
427CONFIG_PPP_BSDCOMP=m 425CONFIG_PPP_BSDCOMP=m
428CONFIG_PPP_DEFLATE=m 426CONFIG_PPP_DEFLATE=m
@@ -465,6 +463,7 @@ CONFIG_HIDRAW=y
465CONFIG_UHID=m 463CONFIG_UHID=m
466# CONFIG_HID_GENERIC is not set 464# CONFIG_HID_GENERIC is not set
467# CONFIG_HID_ITE is not set 465# CONFIG_HID_ITE is not set
466# CONFIG_HID_REDRAGON is not set
468# CONFIG_USB_SUPPORT is not set 467# CONFIG_USB_SUPPORT is not set
469CONFIG_RTC_CLASS=y 468CONFIG_RTC_CLASS=y
470# CONFIG_RTC_NVMEM is not set 469# CONFIG_RTC_NVMEM is not set
@@ -482,7 +481,7 @@ CONFIG_FS_ENCRYPTION=m
482CONFIG_FANOTIFY=y 481CONFIG_FANOTIFY=y
483CONFIG_QUOTA_NETLINK_INTERFACE=y 482CONFIG_QUOTA_NETLINK_INTERFACE=y
484# CONFIG_PRINT_QUOTA_WARNING is not set 483# CONFIG_PRINT_QUOTA_WARNING is not set
485CONFIG_AUTOFS4_FS=m 484CONFIG_AUTOFS_FS=m
486CONFIG_FUSE_FS=m 485CONFIG_FUSE_FS=m
487CONFIG_CUSE=m 486CONFIG_CUSE=m
488CONFIG_OVERLAY_FS=m 487CONFIG_OVERLAY_FS=m
@@ -583,6 +582,7 @@ CONFIG_TEST_KSTRTOX=m
583CONFIG_TEST_PRINTF=m 582CONFIG_TEST_PRINTF=m
584CONFIG_TEST_BITMAP=m 583CONFIG_TEST_BITMAP=m
585CONFIG_TEST_UUID=m 584CONFIG_TEST_UUID=m
585CONFIG_TEST_OVERFLOW=m
586CONFIG_TEST_RHASHTABLE=m 586CONFIG_TEST_RHASHTABLE=m
587CONFIG_TEST_HASH=m 587CONFIG_TEST_HASH=m
588CONFIG_TEST_USER_COPY=m 588CONFIG_TEST_USER_COPY=m
@@ -605,6 +605,11 @@ CONFIG_CRYPTO_CRYPTD=m
605CONFIG_CRYPTO_MCRYPTD=m 605CONFIG_CRYPTO_MCRYPTD=m
606CONFIG_CRYPTO_TEST=m 606CONFIG_CRYPTO_TEST=m
607CONFIG_CRYPTO_CHACHA20POLY1305=m 607CONFIG_CRYPTO_CHACHA20POLY1305=m
608CONFIG_CRYPTO_AEGIS128=m
609CONFIG_CRYPTO_AEGIS128L=m
610CONFIG_CRYPTO_AEGIS256=m
611CONFIG_CRYPTO_MORUS640=m
612CONFIG_CRYPTO_MORUS1280=m
608CONFIG_CRYPTO_CFB=m 613CONFIG_CRYPTO_CFB=m
609CONFIG_CRYPTO_LRW=m 614CONFIG_CRYPTO_LRW=m
610CONFIG_CRYPTO_PCBC=m 615CONFIG_CRYPTO_PCBC=m
@@ -640,6 +645,7 @@ CONFIG_CRYPTO_LZO=m
640CONFIG_CRYPTO_842=m 645CONFIG_CRYPTO_842=m
641CONFIG_CRYPTO_LZ4=m 646CONFIG_CRYPTO_LZ4=m
642CONFIG_CRYPTO_LZ4HC=m 647CONFIG_CRYPTO_LZ4HC=m
648CONFIG_CRYPTO_ZSTD=m
643CONFIG_CRYPTO_ANSI_CPRNG=m 649CONFIG_CRYPTO_ANSI_CPRNG=m
644CONFIG_CRYPTO_DRBG_HASH=y 650CONFIG_CRYPTO_DRBG_HASH=y
645CONFIG_CRYPTO_DRBG_CTR=y 651CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 2a84eeec5b02..e7dd25300127 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -59,6 +59,7 @@ CONFIG_UNIX_DIAG=m
59CONFIG_TLS=m 59CONFIG_TLS=m
60CONFIG_XFRM_MIGRATE=y 60CONFIG_XFRM_MIGRATE=y
61CONFIG_NET_KEY=y 61CONFIG_NET_KEY=y
62CONFIG_XDP_SOCKETS=y
62CONFIG_INET=y 63CONFIG_INET=y
63CONFIG_IP_PNP=y 64CONFIG_IP_PNP=y
64CONFIG_IP_PNP_DHCP=y 65CONFIG_IP_PNP_DHCP=y
@@ -105,18 +106,14 @@ CONFIG_NF_CONNTRACK_SANE=m
105CONFIG_NF_CONNTRACK_SIP=m 106CONFIG_NF_CONNTRACK_SIP=m
106CONFIG_NF_CONNTRACK_TFTP=m 107CONFIG_NF_CONNTRACK_TFTP=m
107CONFIG_NF_TABLES=m 108CONFIG_NF_TABLES=m
109CONFIG_NF_TABLES_SET=m
108CONFIG_NF_TABLES_INET=y 110CONFIG_NF_TABLES_INET=y
109CONFIG_NF_TABLES_NETDEV=y 111CONFIG_NF_TABLES_NETDEV=y
110CONFIG_NFT_EXTHDR=m
111CONFIG_NFT_META=m
112CONFIG_NFT_RT=m
113CONFIG_NFT_NUMGEN=m 112CONFIG_NFT_NUMGEN=m
114CONFIG_NFT_CT=m 113CONFIG_NFT_CT=m
115CONFIG_NFT_FLOW_OFFLOAD=m 114CONFIG_NFT_FLOW_OFFLOAD=m
116CONFIG_NFT_SET_RBTREE=m
117CONFIG_NFT_SET_HASH=m
118CONFIG_NFT_SET_BITMAP=m
119CONFIG_NFT_COUNTER=m 115CONFIG_NFT_COUNTER=m
116CONFIG_NFT_CONNLIMIT=m
120CONFIG_NFT_LOG=m 117CONFIG_NFT_LOG=m
121CONFIG_NFT_LIMIT=m 118CONFIG_NFT_LIMIT=m
122CONFIG_NFT_MASQ=m 119CONFIG_NFT_MASQ=m
@@ -129,6 +126,7 @@ CONFIG_NFT_REJECT=m
129CONFIG_NFT_COMPAT=m 126CONFIG_NFT_COMPAT=m
130CONFIG_NFT_HASH=m 127CONFIG_NFT_HASH=m
131CONFIG_NFT_FIB_INET=m 128CONFIG_NFT_FIB_INET=m
129CONFIG_NFT_SOCKET=m
132CONFIG_NFT_DUP_NETDEV=m 130CONFIG_NFT_DUP_NETDEV=m
133CONFIG_NFT_FWD_NETDEV=m 131CONFIG_NFT_FWD_NETDEV=m
134CONFIG_NFT_FIB_NETDEV=m 132CONFIG_NFT_FIB_NETDEV=m
@@ -207,7 +205,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
207CONFIG_IP_SET_HASH_NETIFACE=m 205CONFIG_IP_SET_HASH_NETIFACE=m
208CONFIG_IP_SET_LIST_SET=m 206CONFIG_IP_SET_LIST_SET=m
209CONFIG_NF_CONNTRACK_IPV4=m 207CONFIG_NF_CONNTRACK_IPV4=m
210CONFIG_NF_SOCKET_IPV4=m
211CONFIG_NFT_CHAIN_ROUTE_IPV4=m 208CONFIG_NFT_CHAIN_ROUTE_IPV4=m
212CONFIG_NFT_DUP_IPV4=m 209CONFIG_NFT_DUP_IPV4=m
213CONFIG_NFT_FIB_IPV4=m 210CONFIG_NFT_FIB_IPV4=m
@@ -238,7 +235,6 @@ CONFIG_IP_NF_ARPTABLES=m
238CONFIG_IP_NF_ARPFILTER=m 235CONFIG_IP_NF_ARPFILTER=m
239CONFIG_IP_NF_ARP_MANGLE=m 236CONFIG_IP_NF_ARP_MANGLE=m
240CONFIG_NF_CONNTRACK_IPV6=m 237CONFIG_NF_CONNTRACK_IPV6=m
241CONFIG_NF_SOCKET_IPV6=m
242CONFIG_NFT_CHAIN_ROUTE_IPV6=m 238CONFIG_NFT_CHAIN_ROUTE_IPV6=m
243CONFIG_NFT_CHAIN_NAT_IPV6=m 239CONFIG_NFT_CHAIN_NAT_IPV6=m
244CONFIG_NFT_MASQ_IPV6=m 240CONFIG_NFT_MASQ_IPV6=m
@@ -267,7 +263,6 @@ CONFIG_IP6_NF_NAT=m
267CONFIG_IP6_NF_TARGET_MASQUERADE=m 263CONFIG_IP6_NF_TARGET_MASQUERADE=m
268CONFIG_IP6_NF_TARGET_NPT=m 264CONFIG_IP6_NF_TARGET_NPT=m
269CONFIG_NF_TABLES_BRIDGE=y 265CONFIG_NF_TABLES_BRIDGE=y
270CONFIG_NFT_BRIDGE_META=m
271CONFIG_NFT_BRIDGE_REJECT=m 266CONFIG_NFT_BRIDGE_REJECT=m
272CONFIG_NF_LOG_BRIDGE=m 267CONFIG_NF_LOG_BRIDGE=m
273CONFIG_BRIDGE_NF_EBTABLES=m 268CONFIG_BRIDGE_NF_EBTABLES=m
@@ -311,6 +306,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
311CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 306CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
312CONFIG_DNS_RESOLVER=y 307CONFIG_DNS_RESOLVER=y
313CONFIG_BATMAN_ADV=m 308CONFIG_BATMAN_ADV=m
309# CONFIG_BATMAN_ADV_BATMAN_V is not set
314CONFIG_BATMAN_ADV_DAT=y 310CONFIG_BATMAN_ADV_DAT=y
315CONFIG_BATMAN_ADV_NC=y 311CONFIG_BATMAN_ADV_NC=y
316CONFIG_BATMAN_ADV_MCAST=y 312CONFIG_BATMAN_ADV_MCAST=y
@@ -373,6 +369,7 @@ CONFIG_A2091_SCSI=y
373CONFIG_GVP11_SCSI=y 369CONFIG_GVP11_SCSI=y
374CONFIG_SCSI_A4000T=y 370CONFIG_SCSI_A4000T=y
375CONFIG_SCSI_ZORRO7XX=y 371CONFIG_SCSI_ZORRO7XX=y
372CONFIG_SCSI_ZORRO_ESP=y
376CONFIG_ATARI_SCSI=y 373CONFIG_ATARI_SCSI=y
377CONFIG_MAC_SCSI=y 374CONFIG_MAC_SCSI=y
378CONFIG_SCSI_MAC_ESP=y 375CONFIG_SCSI_MAC_ESP=y
@@ -387,6 +384,7 @@ CONFIG_DM_UNSTRIPED=m
387CONFIG_DM_CRYPT=m 384CONFIG_DM_CRYPT=m
388CONFIG_DM_SNAPSHOT=m 385CONFIG_DM_SNAPSHOT=m
389CONFIG_DM_THIN_PROVISIONING=m 386CONFIG_DM_THIN_PROVISIONING=m
387CONFIG_DM_WRITECACHE=m
390CONFIG_DM_ERA=m 388CONFIG_DM_ERA=m
391CONFIG_DM_MIRROR=m 389CONFIG_DM_MIRROR=m
392CONFIG_DM_RAID=m 390CONFIG_DM_RAID=m
@@ -438,8 +436,8 @@ CONFIG_SUN3LANCE=y
438CONFIG_MACMACE=y 436CONFIG_MACMACE=y
439# CONFIG_NET_VENDOR_AQUANTIA is not set 437# CONFIG_NET_VENDOR_AQUANTIA is not set
440# CONFIG_NET_VENDOR_ARC is not set 438# CONFIG_NET_VENDOR_ARC is not set
441# CONFIG_NET_CADENCE is not set
442# CONFIG_NET_VENDOR_BROADCOM is not set 439# CONFIG_NET_VENDOR_BROADCOM is not set
440# CONFIG_NET_CADENCE is not set
443CONFIG_MAC89x0=y 441CONFIG_MAC89x0=y
444# CONFIG_NET_VENDOR_CORTINA is not set 442# CONFIG_NET_VENDOR_CORTINA is not set
445# CONFIG_NET_VENDOR_EZCHIP is not set 443# CONFIG_NET_VENDOR_EZCHIP is not set
@@ -449,9 +447,11 @@ CONFIG_BVME6000_NET=y
449CONFIG_MVME16x_NET=y 447CONFIG_MVME16x_NET=y
450# CONFIG_NET_VENDOR_MARVELL is not set 448# CONFIG_NET_VENDOR_MARVELL is not set
451# CONFIG_NET_VENDOR_MICREL is not set 449# CONFIG_NET_VENDOR_MICREL is not set
450# CONFIG_NET_VENDOR_MICROSEMI is not set
452CONFIG_MACSONIC=y 451CONFIG_MACSONIC=y
453# CONFIG_NET_VENDOR_NETRONOME is not set 452# CONFIG_NET_VENDOR_NETRONOME is not set
454# CONFIG_NET_VENDOR_NI is not set 453# CONFIG_NET_VENDOR_NI is not set
454CONFIG_XSURF100=y
455CONFIG_HYDRA=y 455CONFIG_HYDRA=y
456CONFIG_MAC8390=y 456CONFIG_MAC8390=y
457CONFIG_NE2000=y 457CONFIG_NE2000=y
@@ -466,9 +466,9 @@ CONFIG_ZORRO8390=y
466CONFIG_SMC91X=y 466CONFIG_SMC91X=y
467# CONFIG_NET_VENDOR_SOCIONEXT is not set 467# CONFIG_NET_VENDOR_SOCIONEXT is not set
468# CONFIG_NET_VENDOR_STMICRO is not set 468# CONFIG_NET_VENDOR_STMICRO is not set
469# CONFIG_NET_VENDOR_SYNOPSYS is not set
469# CONFIG_NET_VENDOR_VIA is not set 470# CONFIG_NET_VENDOR_VIA is not set
470# CONFIG_NET_VENDOR_WIZNET is not set 471# CONFIG_NET_VENDOR_WIZNET is not set
471# CONFIG_NET_VENDOR_SYNOPSYS is not set
472CONFIG_PLIP=m 472CONFIG_PLIP=m
473CONFIG_PPP=m 473CONFIG_PPP=m
474CONFIG_PPP_BSDCOMP=m 474CONFIG_PPP_BSDCOMP=m
@@ -533,6 +533,7 @@ CONFIG_HIDRAW=y
533CONFIG_UHID=m 533CONFIG_UHID=m
534# CONFIG_HID_GENERIC is not set 534# CONFIG_HID_GENERIC is not set
535# CONFIG_HID_ITE is not set 535# CONFIG_HID_ITE is not set
536# CONFIG_HID_REDRAGON is not set
536# CONFIG_USB_SUPPORT is not set 537# CONFIG_USB_SUPPORT is not set
537CONFIG_RTC_CLASS=y 538CONFIG_RTC_CLASS=y
538# CONFIG_RTC_NVMEM is not set 539# CONFIG_RTC_NVMEM is not set
@@ -562,7 +563,7 @@ CONFIG_FS_ENCRYPTION=m
562CONFIG_FANOTIFY=y 563CONFIG_FANOTIFY=y
563CONFIG_QUOTA_NETLINK_INTERFACE=y 564CONFIG_QUOTA_NETLINK_INTERFACE=y
564# CONFIG_PRINT_QUOTA_WARNING is not set 565# CONFIG_PRINT_QUOTA_WARNING is not set
565CONFIG_AUTOFS4_FS=m 566CONFIG_AUTOFS_FS=m
566CONFIG_FUSE_FS=m 567CONFIG_FUSE_FS=m
567CONFIG_CUSE=m 568CONFIG_CUSE=m
568CONFIG_OVERLAY_FS=m 569CONFIG_OVERLAY_FS=m
@@ -663,6 +664,7 @@ CONFIG_TEST_KSTRTOX=m
663CONFIG_TEST_PRINTF=m 664CONFIG_TEST_PRINTF=m
664CONFIG_TEST_BITMAP=m 665CONFIG_TEST_BITMAP=m
665CONFIG_TEST_UUID=m 666CONFIG_TEST_UUID=m
667CONFIG_TEST_OVERFLOW=m
666CONFIG_TEST_RHASHTABLE=m 668CONFIG_TEST_RHASHTABLE=m
667CONFIG_TEST_HASH=m 669CONFIG_TEST_HASH=m
668CONFIG_TEST_USER_COPY=m 670CONFIG_TEST_USER_COPY=m
@@ -685,6 +687,11 @@ CONFIG_CRYPTO_CRYPTD=m
685CONFIG_CRYPTO_MCRYPTD=m 687CONFIG_CRYPTO_MCRYPTD=m
686CONFIG_CRYPTO_TEST=m 688CONFIG_CRYPTO_TEST=m
687CONFIG_CRYPTO_CHACHA20POLY1305=m 689CONFIG_CRYPTO_CHACHA20POLY1305=m
690CONFIG_CRYPTO_AEGIS128=m
691CONFIG_CRYPTO_AEGIS128L=m
692CONFIG_CRYPTO_AEGIS256=m
693CONFIG_CRYPTO_MORUS640=m
694CONFIG_CRYPTO_MORUS1280=m
688CONFIG_CRYPTO_CFB=m 695CONFIG_CRYPTO_CFB=m
689CONFIG_CRYPTO_LRW=m 696CONFIG_CRYPTO_LRW=m
690CONFIG_CRYPTO_PCBC=m 697CONFIG_CRYPTO_PCBC=m
@@ -720,6 +727,7 @@ CONFIG_CRYPTO_LZO=m
720CONFIG_CRYPTO_842=m 727CONFIG_CRYPTO_842=m
721CONFIG_CRYPTO_LZ4=m 728CONFIG_CRYPTO_LZ4=m
722CONFIG_CRYPTO_LZ4HC=m 729CONFIG_CRYPTO_LZ4HC=m
730CONFIG_CRYPTO_ZSTD=m
723CONFIG_CRYPTO_ANSI_CPRNG=m 731CONFIG_CRYPTO_ANSI_CPRNG=m
724CONFIG_CRYPTO_DRBG_HASH=y 732CONFIG_CRYPTO_DRBG_HASH=y
725CONFIG_CRYPTO_DRBG_CTR=y 733CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 476e69994340..b383327fd77a 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -47,6 +47,7 @@ CONFIG_UNIX_DIAG=m
47CONFIG_TLS=m 47CONFIG_TLS=m
48CONFIG_XFRM_MIGRATE=y 48CONFIG_XFRM_MIGRATE=y
49CONFIG_NET_KEY=y 49CONFIG_NET_KEY=y
50CONFIG_XDP_SOCKETS=y
50CONFIG_INET=y 51CONFIG_INET=y
51CONFIG_IP_PNP=y 52CONFIG_IP_PNP=y
52CONFIG_IP_PNP_DHCP=y 53CONFIG_IP_PNP_DHCP=y
@@ -93,18 +94,14 @@ CONFIG_NF_CONNTRACK_SANE=m
93CONFIG_NF_CONNTRACK_SIP=m 94CONFIG_NF_CONNTRACK_SIP=m
94CONFIG_NF_CONNTRACK_TFTP=m 95CONFIG_NF_CONNTRACK_TFTP=m
95CONFIG_NF_TABLES=m 96CONFIG_NF_TABLES=m
97CONFIG_NF_TABLES_SET=m
96CONFIG_NF_TABLES_INET=y 98CONFIG_NF_TABLES_INET=y
97CONFIG_NF_TABLES_NETDEV=y 99CONFIG_NF_TABLES_NETDEV=y
98CONFIG_NFT_EXTHDR=m
99CONFIG_NFT_META=m
100CONFIG_NFT_RT=m
101CONFIG_NFT_NUMGEN=m 100CONFIG_NFT_NUMGEN=m
102CONFIG_NFT_CT=m 101CONFIG_NFT_CT=m
103CONFIG_NFT_FLOW_OFFLOAD=m 102CONFIG_NFT_FLOW_OFFLOAD=m
104CONFIG_NFT_SET_RBTREE=m
105CONFIG_NFT_SET_HASH=m
106CONFIG_NFT_SET_BITMAP=m
107CONFIG_NFT_COUNTER=m 103CONFIG_NFT_COUNTER=m
104CONFIG_NFT_CONNLIMIT=m
108CONFIG_NFT_LOG=m 105CONFIG_NFT_LOG=m
109CONFIG_NFT_LIMIT=m 106CONFIG_NFT_LIMIT=m
110CONFIG_NFT_MASQ=m 107CONFIG_NFT_MASQ=m
@@ -117,6 +114,7 @@ CONFIG_NFT_REJECT=m
117CONFIG_NFT_COMPAT=m 114CONFIG_NFT_COMPAT=m
118CONFIG_NFT_HASH=m 115CONFIG_NFT_HASH=m
119CONFIG_NFT_FIB_INET=m 116CONFIG_NFT_FIB_INET=m
117CONFIG_NFT_SOCKET=m
120CONFIG_NFT_DUP_NETDEV=m 118CONFIG_NFT_DUP_NETDEV=m
121CONFIG_NFT_FWD_NETDEV=m 119CONFIG_NFT_FWD_NETDEV=m
122CONFIG_NFT_FIB_NETDEV=m 120CONFIG_NFT_FIB_NETDEV=m
@@ -195,7 +193,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
195CONFIG_IP_SET_HASH_NETIFACE=m 193CONFIG_IP_SET_HASH_NETIFACE=m
196CONFIG_IP_SET_LIST_SET=m 194CONFIG_IP_SET_LIST_SET=m
197CONFIG_NF_CONNTRACK_IPV4=m 195CONFIG_NF_CONNTRACK_IPV4=m
198CONFIG_NF_SOCKET_IPV4=m
199CONFIG_NFT_CHAIN_ROUTE_IPV4=m 196CONFIG_NFT_CHAIN_ROUTE_IPV4=m
200CONFIG_NFT_DUP_IPV4=m 197CONFIG_NFT_DUP_IPV4=m
201CONFIG_NFT_FIB_IPV4=m 198CONFIG_NFT_FIB_IPV4=m
@@ -226,7 +223,6 @@ CONFIG_IP_NF_ARPTABLES=m
226CONFIG_IP_NF_ARPFILTER=m 223CONFIG_IP_NF_ARPFILTER=m
227CONFIG_IP_NF_ARP_MANGLE=m 224CONFIG_IP_NF_ARP_MANGLE=m
228CONFIG_NF_CONNTRACK_IPV6=m 225CONFIG_NF_CONNTRACK_IPV6=m
229CONFIG_NF_SOCKET_IPV6=m
230CONFIG_NFT_CHAIN_ROUTE_IPV6=m 226CONFIG_NFT_CHAIN_ROUTE_IPV6=m
231CONFIG_NFT_CHAIN_NAT_IPV6=m 227CONFIG_NFT_CHAIN_NAT_IPV6=m
232CONFIG_NFT_MASQ_IPV6=m 228CONFIG_NFT_MASQ_IPV6=m
@@ -255,7 +251,6 @@ CONFIG_IP6_NF_NAT=m
255CONFIG_IP6_NF_TARGET_MASQUERADE=m 251CONFIG_IP6_NF_TARGET_MASQUERADE=m
256CONFIG_IP6_NF_TARGET_NPT=m 252CONFIG_IP6_NF_TARGET_NPT=m
257CONFIG_NF_TABLES_BRIDGE=y 253CONFIG_NF_TABLES_BRIDGE=y
258CONFIG_NFT_BRIDGE_META=m
259CONFIG_NFT_BRIDGE_REJECT=m 254CONFIG_NFT_BRIDGE_REJECT=m
260CONFIG_NF_LOG_BRIDGE=m 255CONFIG_NF_LOG_BRIDGE=m
261CONFIG_BRIDGE_NF_EBTABLES=m 256CONFIG_BRIDGE_NF_EBTABLES=m
@@ -296,6 +291,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
296CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 291CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
297CONFIG_DNS_RESOLVER=y 292CONFIG_DNS_RESOLVER=y
298CONFIG_BATMAN_ADV=m 293CONFIG_BATMAN_ADV=m
294# CONFIG_BATMAN_ADV_BATMAN_V is not set
299CONFIG_BATMAN_ADV_DAT=y 295CONFIG_BATMAN_ADV_DAT=y
300CONFIG_BATMAN_ADV_NC=y 296CONFIG_BATMAN_ADV_NC=y
301CONFIG_BATMAN_ADV_MCAST=y 297CONFIG_BATMAN_ADV_MCAST=y
@@ -343,6 +339,7 @@ CONFIG_DM_UNSTRIPED=m
343CONFIG_DM_CRYPT=m 339CONFIG_DM_CRYPT=m
344CONFIG_DM_SNAPSHOT=m 340CONFIG_DM_SNAPSHOT=m
345CONFIG_DM_THIN_PROVISIONING=m 341CONFIG_DM_THIN_PROVISIONING=m
342CONFIG_DM_WRITECACHE=m
346CONFIG_DM_ERA=m 343CONFIG_DM_ERA=m
347CONFIG_DM_MIRROR=m 344CONFIG_DM_MIRROR=m
348CONFIG_DM_RAID=m 345CONFIG_DM_RAID=m
@@ -380,14 +377,15 @@ CONFIG_VETH=m
380CONFIG_MVME147_NET=y 377CONFIG_MVME147_NET=y
381# CONFIG_NET_VENDOR_AQUANTIA is not set 378# CONFIG_NET_VENDOR_AQUANTIA is not set
382# CONFIG_NET_VENDOR_ARC is not set 379# CONFIG_NET_VENDOR_ARC is not set
383# CONFIG_NET_CADENCE is not set
384# CONFIG_NET_VENDOR_BROADCOM is not set 380# CONFIG_NET_VENDOR_BROADCOM is not set
381# CONFIG_NET_CADENCE is not set
385# CONFIG_NET_VENDOR_CORTINA is not set 382# CONFIG_NET_VENDOR_CORTINA is not set
386# CONFIG_NET_VENDOR_EZCHIP is not set 383# CONFIG_NET_VENDOR_EZCHIP is not set
387# CONFIG_NET_VENDOR_HUAWEI is not set 384# CONFIG_NET_VENDOR_HUAWEI is not set
388# CONFIG_NET_VENDOR_INTEL is not set 385# CONFIG_NET_VENDOR_INTEL is not set
389# CONFIG_NET_VENDOR_MARVELL is not set 386# CONFIG_NET_VENDOR_MARVELL is not set
390# CONFIG_NET_VENDOR_MICREL is not set 387# CONFIG_NET_VENDOR_MICREL is not set
388# CONFIG_NET_VENDOR_MICROSEMI is not set
391# CONFIG_NET_VENDOR_NATSEMI is not set 389# CONFIG_NET_VENDOR_NATSEMI is not set
392# CONFIG_NET_VENDOR_NETRONOME is not set 390# CONFIG_NET_VENDOR_NETRONOME is not set
393# CONFIG_NET_VENDOR_NI is not set 391# CONFIG_NET_VENDOR_NI is not set
@@ -399,9 +397,9 @@ CONFIG_MVME147_NET=y
399# CONFIG_NET_VENDOR_SOLARFLARE is not set 397# CONFIG_NET_VENDOR_SOLARFLARE is not set
400# CONFIG_NET_VENDOR_SOCIONEXT is not set 398# CONFIG_NET_VENDOR_SOCIONEXT is not set
401# CONFIG_NET_VENDOR_STMICRO is not set 399# CONFIG_NET_VENDOR_STMICRO is not set
400# CONFIG_NET_VENDOR_SYNOPSYS is not set
402# CONFIG_NET_VENDOR_VIA is not set 401# CONFIG_NET_VENDOR_VIA is not set
403# CONFIG_NET_VENDOR_WIZNET is not set 402# CONFIG_NET_VENDOR_WIZNET is not set
404# CONFIG_NET_VENDOR_SYNOPSYS is not set
405CONFIG_PPP=m 403CONFIG_PPP=m
406CONFIG_PPP_BSDCOMP=m 404CONFIG_PPP_BSDCOMP=m
407CONFIG_PPP_DEFLATE=m 405CONFIG_PPP_DEFLATE=m
@@ -433,6 +431,7 @@ CONFIG_HIDRAW=y
433CONFIG_UHID=m 431CONFIG_UHID=m
434# CONFIG_HID_GENERIC is not set 432# CONFIG_HID_GENERIC is not set
435# CONFIG_HID_ITE is not set 433# CONFIG_HID_ITE is not set
434# CONFIG_HID_REDRAGON is not set
436# CONFIG_USB_SUPPORT is not set 435# CONFIG_USB_SUPPORT is not set
437CONFIG_RTC_CLASS=y 436CONFIG_RTC_CLASS=y
438# CONFIG_RTC_NVMEM is not set 437# CONFIG_RTC_NVMEM is not set
@@ -450,7 +449,7 @@ CONFIG_FS_ENCRYPTION=m
450CONFIG_FANOTIFY=y 449CONFIG_FANOTIFY=y
451CONFIG_QUOTA_NETLINK_INTERFACE=y 450CONFIG_QUOTA_NETLINK_INTERFACE=y
452# CONFIG_PRINT_QUOTA_WARNING is not set 451# CONFIG_PRINT_QUOTA_WARNING is not set
453CONFIG_AUTOFS4_FS=m 452CONFIG_AUTOFS_FS=m
454CONFIG_FUSE_FS=m 453CONFIG_FUSE_FS=m
455CONFIG_CUSE=m 454CONFIG_CUSE=m
456CONFIG_OVERLAY_FS=m 455CONFIG_OVERLAY_FS=m
@@ -551,6 +550,7 @@ CONFIG_TEST_KSTRTOX=m
551CONFIG_TEST_PRINTF=m 550CONFIG_TEST_PRINTF=m
552CONFIG_TEST_BITMAP=m 551CONFIG_TEST_BITMAP=m
553CONFIG_TEST_UUID=m 552CONFIG_TEST_UUID=m
553CONFIG_TEST_OVERFLOW=m
554CONFIG_TEST_RHASHTABLE=m 554CONFIG_TEST_RHASHTABLE=m
555CONFIG_TEST_HASH=m 555CONFIG_TEST_HASH=m
556CONFIG_TEST_USER_COPY=m 556CONFIG_TEST_USER_COPY=m
@@ -573,6 +573,11 @@ CONFIG_CRYPTO_CRYPTD=m
573CONFIG_CRYPTO_MCRYPTD=m 573CONFIG_CRYPTO_MCRYPTD=m
574CONFIG_CRYPTO_TEST=m 574CONFIG_CRYPTO_TEST=m
575CONFIG_CRYPTO_CHACHA20POLY1305=m 575CONFIG_CRYPTO_CHACHA20POLY1305=m
576CONFIG_CRYPTO_AEGIS128=m
577CONFIG_CRYPTO_AEGIS128L=m
578CONFIG_CRYPTO_AEGIS256=m
579CONFIG_CRYPTO_MORUS640=m
580CONFIG_CRYPTO_MORUS1280=m
576CONFIG_CRYPTO_CFB=m 581CONFIG_CRYPTO_CFB=m
577CONFIG_CRYPTO_LRW=m 582CONFIG_CRYPTO_LRW=m
578CONFIG_CRYPTO_PCBC=m 583CONFIG_CRYPTO_PCBC=m
@@ -608,6 +613,7 @@ CONFIG_CRYPTO_LZO=m
608CONFIG_CRYPTO_842=m 613CONFIG_CRYPTO_842=m
609CONFIG_CRYPTO_LZ4=m 614CONFIG_CRYPTO_LZ4=m
610CONFIG_CRYPTO_LZ4HC=m 615CONFIG_CRYPTO_LZ4HC=m
616CONFIG_CRYPTO_ZSTD=m
611CONFIG_CRYPTO_ANSI_CPRNG=m 617CONFIG_CRYPTO_ANSI_CPRNG=m
612CONFIG_CRYPTO_DRBG_HASH=y 618CONFIG_CRYPTO_DRBG_HASH=y
613CONFIG_CRYPTO_DRBG_CTR=y 619CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 1477cda9146e..9783d3deb9e9 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -48,6 +48,7 @@ CONFIG_UNIX_DIAG=m
48CONFIG_TLS=m 48CONFIG_TLS=m
49CONFIG_XFRM_MIGRATE=y 49CONFIG_XFRM_MIGRATE=y
50CONFIG_NET_KEY=y 50CONFIG_NET_KEY=y
51CONFIG_XDP_SOCKETS=y
51CONFIG_INET=y 52CONFIG_INET=y
52CONFIG_IP_PNP=y 53CONFIG_IP_PNP=y
53CONFIG_IP_PNP_DHCP=y 54CONFIG_IP_PNP_DHCP=y
@@ -94,18 +95,14 @@ CONFIG_NF_CONNTRACK_SANE=m
94CONFIG_NF_CONNTRACK_SIP=m 95CONFIG_NF_CONNTRACK_SIP=m
95CONFIG_NF_CONNTRACK_TFTP=m 96CONFIG_NF_CONNTRACK_TFTP=m
96CONFIG_NF_TABLES=m 97CONFIG_NF_TABLES=m
98CONFIG_NF_TABLES_SET=m
97CONFIG_NF_TABLES_INET=y 99CONFIG_NF_TABLES_INET=y
98CONFIG_NF_TABLES_NETDEV=y 100CONFIG_NF_TABLES_NETDEV=y
99CONFIG_NFT_EXTHDR=m
100CONFIG_NFT_META=m
101CONFIG_NFT_RT=m
102CONFIG_NFT_NUMGEN=m 101CONFIG_NFT_NUMGEN=m
103CONFIG_NFT_CT=m 102CONFIG_NFT_CT=m
104CONFIG_NFT_FLOW_OFFLOAD=m 103CONFIG_NFT_FLOW_OFFLOAD=m
105CONFIG_NFT_SET_RBTREE=m
106CONFIG_NFT_SET_HASH=m
107CONFIG_NFT_SET_BITMAP=m
108CONFIG_NFT_COUNTER=m 104CONFIG_NFT_COUNTER=m
105CONFIG_NFT_CONNLIMIT=m
109CONFIG_NFT_LOG=m 106CONFIG_NFT_LOG=m
110CONFIG_NFT_LIMIT=m 107CONFIG_NFT_LIMIT=m
111CONFIG_NFT_MASQ=m 108CONFIG_NFT_MASQ=m
@@ -118,6 +115,7 @@ CONFIG_NFT_REJECT=m
118CONFIG_NFT_COMPAT=m 115CONFIG_NFT_COMPAT=m
119CONFIG_NFT_HASH=m 116CONFIG_NFT_HASH=m
120CONFIG_NFT_FIB_INET=m 117CONFIG_NFT_FIB_INET=m
118CONFIG_NFT_SOCKET=m
121CONFIG_NFT_DUP_NETDEV=m 119CONFIG_NFT_DUP_NETDEV=m
122CONFIG_NFT_FWD_NETDEV=m 120CONFIG_NFT_FWD_NETDEV=m
123CONFIG_NFT_FIB_NETDEV=m 121CONFIG_NFT_FIB_NETDEV=m
@@ -196,7 +194,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
196CONFIG_IP_SET_HASH_NETIFACE=m 194CONFIG_IP_SET_HASH_NETIFACE=m
197CONFIG_IP_SET_LIST_SET=m 195CONFIG_IP_SET_LIST_SET=m
198CONFIG_NF_CONNTRACK_IPV4=m 196CONFIG_NF_CONNTRACK_IPV4=m
199CONFIG_NF_SOCKET_IPV4=m
200CONFIG_NFT_CHAIN_ROUTE_IPV4=m 197CONFIG_NFT_CHAIN_ROUTE_IPV4=m
201CONFIG_NFT_DUP_IPV4=m 198CONFIG_NFT_DUP_IPV4=m
202CONFIG_NFT_FIB_IPV4=m 199CONFIG_NFT_FIB_IPV4=m
@@ -227,7 +224,6 @@ CONFIG_IP_NF_ARPTABLES=m
227CONFIG_IP_NF_ARPFILTER=m 224CONFIG_IP_NF_ARPFILTER=m
228CONFIG_IP_NF_ARP_MANGLE=m 225CONFIG_IP_NF_ARP_MANGLE=m
229CONFIG_NF_CONNTRACK_IPV6=m 226CONFIG_NF_CONNTRACK_IPV6=m
230CONFIG_NF_SOCKET_IPV6=m
231CONFIG_NFT_CHAIN_ROUTE_IPV6=m 227CONFIG_NFT_CHAIN_ROUTE_IPV6=m
232CONFIG_NFT_CHAIN_NAT_IPV6=m 228CONFIG_NFT_CHAIN_NAT_IPV6=m
233CONFIG_NFT_MASQ_IPV6=m 229CONFIG_NFT_MASQ_IPV6=m
@@ -256,7 +252,6 @@ CONFIG_IP6_NF_NAT=m
256CONFIG_IP6_NF_TARGET_MASQUERADE=m 252CONFIG_IP6_NF_TARGET_MASQUERADE=m
257CONFIG_IP6_NF_TARGET_NPT=m 253CONFIG_IP6_NF_TARGET_NPT=m
258CONFIG_NF_TABLES_BRIDGE=y 254CONFIG_NF_TABLES_BRIDGE=y
259CONFIG_NFT_BRIDGE_META=m
260CONFIG_NFT_BRIDGE_REJECT=m 255CONFIG_NFT_BRIDGE_REJECT=m
261CONFIG_NF_LOG_BRIDGE=m 256CONFIG_NF_LOG_BRIDGE=m
262CONFIG_BRIDGE_NF_EBTABLES=m 257CONFIG_BRIDGE_NF_EBTABLES=m
@@ -297,6 +292,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
297CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 292CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
298CONFIG_DNS_RESOLVER=y 293CONFIG_DNS_RESOLVER=y
299CONFIG_BATMAN_ADV=m 294CONFIG_BATMAN_ADV=m
295# CONFIG_BATMAN_ADV_BATMAN_V is not set
300CONFIG_BATMAN_ADV_DAT=y 296CONFIG_BATMAN_ADV_DAT=y
301CONFIG_BATMAN_ADV_NC=y 297CONFIG_BATMAN_ADV_NC=y
302CONFIG_BATMAN_ADV_MCAST=y 298CONFIG_BATMAN_ADV_MCAST=y
@@ -344,6 +340,7 @@ CONFIG_DM_UNSTRIPED=m
344CONFIG_DM_CRYPT=m 340CONFIG_DM_CRYPT=m
345CONFIG_DM_SNAPSHOT=m 341CONFIG_DM_SNAPSHOT=m
346CONFIG_DM_THIN_PROVISIONING=m 342CONFIG_DM_THIN_PROVISIONING=m
343CONFIG_DM_WRITECACHE=m
347CONFIG_DM_ERA=m 344CONFIG_DM_ERA=m
348CONFIG_DM_MIRROR=m 345CONFIG_DM_MIRROR=m
349CONFIG_DM_RAID=m 346CONFIG_DM_RAID=m
@@ -380,14 +377,15 @@ CONFIG_VETH=m
380# CONFIG_NET_VENDOR_AMAZON is not set 377# CONFIG_NET_VENDOR_AMAZON is not set
381# CONFIG_NET_VENDOR_AQUANTIA is not set 378# CONFIG_NET_VENDOR_AQUANTIA is not set
382# CONFIG_NET_VENDOR_ARC is not set 379# CONFIG_NET_VENDOR_ARC is not set
383# CONFIG_NET_CADENCE is not set
384# CONFIG_NET_VENDOR_BROADCOM is not set 380# CONFIG_NET_VENDOR_BROADCOM is not set
381# CONFIG_NET_CADENCE is not set
385# CONFIG_NET_VENDOR_CORTINA is not set 382# CONFIG_NET_VENDOR_CORTINA is not set
386# CONFIG_NET_VENDOR_EZCHIP is not set 383# CONFIG_NET_VENDOR_EZCHIP is not set
387# CONFIG_NET_VENDOR_HUAWEI is not set 384# CONFIG_NET_VENDOR_HUAWEI is not set
388CONFIG_MVME16x_NET=y 385CONFIG_MVME16x_NET=y
389# CONFIG_NET_VENDOR_MARVELL is not set 386# CONFIG_NET_VENDOR_MARVELL is not set
390# CONFIG_NET_VENDOR_MICREL is not set 387# CONFIG_NET_VENDOR_MICREL is not set
388# CONFIG_NET_VENDOR_MICROSEMI is not set
391# CONFIG_NET_VENDOR_NATSEMI is not set 389# CONFIG_NET_VENDOR_NATSEMI is not set
392# CONFIG_NET_VENDOR_NETRONOME is not set 390# CONFIG_NET_VENDOR_NETRONOME is not set
393# CONFIG_NET_VENDOR_NI is not set 391# CONFIG_NET_VENDOR_NI is not set
@@ -399,9 +397,9 @@ CONFIG_MVME16x_NET=y
399# CONFIG_NET_VENDOR_SOLARFLARE is not set 397# CONFIG_NET_VENDOR_SOLARFLARE is not set
400# CONFIG_NET_VENDOR_SOCIONEXT is not set 398# CONFIG_NET_VENDOR_SOCIONEXT is not set
401# CONFIG_NET_VENDOR_STMICRO is not set 399# CONFIG_NET_VENDOR_STMICRO is not set
400# CONFIG_NET_VENDOR_SYNOPSYS is not set
402# CONFIG_NET_VENDOR_VIA is not set 401# CONFIG_NET_VENDOR_VIA is not set
403# CONFIG_NET_VENDOR_WIZNET is not set 402# CONFIG_NET_VENDOR_WIZNET is not set
404# CONFIG_NET_VENDOR_SYNOPSYS is not set
405CONFIG_PPP=m 403CONFIG_PPP=m
406CONFIG_PPP_BSDCOMP=m 404CONFIG_PPP_BSDCOMP=m
407CONFIG_PPP_DEFLATE=m 405CONFIG_PPP_DEFLATE=m
@@ -433,6 +431,7 @@ CONFIG_HIDRAW=y
433CONFIG_UHID=m 431CONFIG_UHID=m
434# CONFIG_HID_GENERIC is not set 432# CONFIG_HID_GENERIC is not set
435# CONFIG_HID_ITE is not set 433# CONFIG_HID_ITE is not set
434# CONFIG_HID_REDRAGON is not set
436# CONFIG_USB_SUPPORT is not set 435# CONFIG_USB_SUPPORT is not set
437CONFIG_RTC_CLASS=y 436CONFIG_RTC_CLASS=y
438# CONFIG_RTC_NVMEM is not set 437# CONFIG_RTC_NVMEM is not set
@@ -450,7 +449,7 @@ CONFIG_FS_ENCRYPTION=m
450CONFIG_FANOTIFY=y 449CONFIG_FANOTIFY=y
451CONFIG_QUOTA_NETLINK_INTERFACE=y 450CONFIG_QUOTA_NETLINK_INTERFACE=y
452# CONFIG_PRINT_QUOTA_WARNING is not set 451# CONFIG_PRINT_QUOTA_WARNING is not set
453CONFIG_AUTOFS4_FS=m 452CONFIG_AUTOFS_FS=m
454CONFIG_FUSE_FS=m 453CONFIG_FUSE_FS=m
455CONFIG_CUSE=m 454CONFIG_CUSE=m
456CONFIG_OVERLAY_FS=m 455CONFIG_OVERLAY_FS=m
@@ -551,6 +550,7 @@ CONFIG_TEST_KSTRTOX=m
551CONFIG_TEST_PRINTF=m 550CONFIG_TEST_PRINTF=m
552CONFIG_TEST_BITMAP=m 551CONFIG_TEST_BITMAP=m
553CONFIG_TEST_UUID=m 552CONFIG_TEST_UUID=m
553CONFIG_TEST_OVERFLOW=m
554CONFIG_TEST_RHASHTABLE=m 554CONFIG_TEST_RHASHTABLE=m
555CONFIG_TEST_HASH=m 555CONFIG_TEST_HASH=m
556CONFIG_TEST_USER_COPY=m 556CONFIG_TEST_USER_COPY=m
@@ -573,6 +573,11 @@ CONFIG_CRYPTO_CRYPTD=m
573CONFIG_CRYPTO_MCRYPTD=m 573CONFIG_CRYPTO_MCRYPTD=m
574CONFIG_CRYPTO_TEST=m 574CONFIG_CRYPTO_TEST=m
575CONFIG_CRYPTO_CHACHA20POLY1305=m 575CONFIG_CRYPTO_CHACHA20POLY1305=m
576CONFIG_CRYPTO_AEGIS128=m
577CONFIG_CRYPTO_AEGIS128L=m
578CONFIG_CRYPTO_AEGIS256=m
579CONFIG_CRYPTO_MORUS640=m
580CONFIG_CRYPTO_MORUS1280=m
576CONFIG_CRYPTO_CFB=m 581CONFIG_CRYPTO_CFB=m
577CONFIG_CRYPTO_LRW=m 582CONFIG_CRYPTO_LRW=m
578CONFIG_CRYPTO_PCBC=m 583CONFIG_CRYPTO_PCBC=m
@@ -608,6 +613,7 @@ CONFIG_CRYPTO_LZO=m
608CONFIG_CRYPTO_842=m 613CONFIG_CRYPTO_842=m
609CONFIG_CRYPTO_LZ4=m 614CONFIG_CRYPTO_LZ4=m
610CONFIG_CRYPTO_LZ4HC=m 615CONFIG_CRYPTO_LZ4HC=m
616CONFIG_CRYPTO_ZSTD=m
611CONFIG_CRYPTO_ANSI_CPRNG=m 617CONFIG_CRYPTO_ANSI_CPRNG=m
612CONFIG_CRYPTO_DRBG_HASH=y 618CONFIG_CRYPTO_DRBG_HASH=y
613CONFIG_CRYPTO_DRBG_CTR=y 619CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index b3a543dc48a0..a35d10ee10cb 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -48,6 +48,7 @@ CONFIG_UNIX_DIAG=m
48CONFIG_TLS=m 48CONFIG_TLS=m
49CONFIG_XFRM_MIGRATE=y 49CONFIG_XFRM_MIGRATE=y
50CONFIG_NET_KEY=y 50CONFIG_NET_KEY=y
51CONFIG_XDP_SOCKETS=y
51CONFIG_INET=y 52CONFIG_INET=y
52CONFIG_IP_PNP=y 53CONFIG_IP_PNP=y
53CONFIG_IP_PNP_DHCP=y 54CONFIG_IP_PNP_DHCP=y
@@ -94,18 +95,14 @@ CONFIG_NF_CONNTRACK_SANE=m
94CONFIG_NF_CONNTRACK_SIP=m 95CONFIG_NF_CONNTRACK_SIP=m
95CONFIG_NF_CONNTRACK_TFTP=m 96CONFIG_NF_CONNTRACK_TFTP=m
96CONFIG_NF_TABLES=m 97CONFIG_NF_TABLES=m
98CONFIG_NF_TABLES_SET=m
97CONFIG_NF_TABLES_INET=y 99CONFIG_NF_TABLES_INET=y
98CONFIG_NF_TABLES_NETDEV=y 100CONFIG_NF_TABLES_NETDEV=y
99CONFIG_NFT_EXTHDR=m
100CONFIG_NFT_META=m
101CONFIG_NFT_RT=m
102CONFIG_NFT_NUMGEN=m 101CONFIG_NFT_NUMGEN=m
103CONFIG_NFT_CT=m 102CONFIG_NFT_CT=m
104CONFIG_NFT_FLOW_OFFLOAD=m 103CONFIG_NFT_FLOW_OFFLOAD=m
105CONFIG_NFT_SET_RBTREE=m
106CONFIG_NFT_SET_HASH=m
107CONFIG_NFT_SET_BITMAP=m
108CONFIG_NFT_COUNTER=m 104CONFIG_NFT_COUNTER=m
105CONFIG_NFT_CONNLIMIT=m
109CONFIG_NFT_LOG=m 106CONFIG_NFT_LOG=m
110CONFIG_NFT_LIMIT=m 107CONFIG_NFT_LIMIT=m
111CONFIG_NFT_MASQ=m 108CONFIG_NFT_MASQ=m
@@ -118,6 +115,7 @@ CONFIG_NFT_REJECT=m
118CONFIG_NFT_COMPAT=m 115CONFIG_NFT_COMPAT=m
119CONFIG_NFT_HASH=m 116CONFIG_NFT_HASH=m
120CONFIG_NFT_FIB_INET=m 117CONFIG_NFT_FIB_INET=m
118CONFIG_NFT_SOCKET=m
121CONFIG_NFT_DUP_NETDEV=m 119CONFIG_NFT_DUP_NETDEV=m
122CONFIG_NFT_FWD_NETDEV=m 120CONFIG_NFT_FWD_NETDEV=m
123CONFIG_NFT_FIB_NETDEV=m 121CONFIG_NFT_FIB_NETDEV=m
@@ -196,7 +194,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
196CONFIG_IP_SET_HASH_NETIFACE=m 194CONFIG_IP_SET_HASH_NETIFACE=m
197CONFIG_IP_SET_LIST_SET=m 195CONFIG_IP_SET_LIST_SET=m
198CONFIG_NF_CONNTRACK_IPV4=m 196CONFIG_NF_CONNTRACK_IPV4=m
199CONFIG_NF_SOCKET_IPV4=m
200CONFIG_NFT_CHAIN_ROUTE_IPV4=m 197CONFIG_NFT_CHAIN_ROUTE_IPV4=m
201CONFIG_NFT_DUP_IPV4=m 198CONFIG_NFT_DUP_IPV4=m
202CONFIG_NFT_FIB_IPV4=m 199CONFIG_NFT_FIB_IPV4=m
@@ -227,7 +224,6 @@ CONFIG_IP_NF_ARPTABLES=m
227CONFIG_IP_NF_ARPFILTER=m 224CONFIG_IP_NF_ARPFILTER=m
228CONFIG_IP_NF_ARP_MANGLE=m 225CONFIG_IP_NF_ARP_MANGLE=m
229CONFIG_NF_CONNTRACK_IPV6=m 226CONFIG_NF_CONNTRACK_IPV6=m
230CONFIG_NF_SOCKET_IPV6=m
231CONFIG_NFT_CHAIN_ROUTE_IPV6=m 227CONFIG_NFT_CHAIN_ROUTE_IPV6=m
232CONFIG_NFT_CHAIN_NAT_IPV6=m 228CONFIG_NFT_CHAIN_NAT_IPV6=m
233CONFIG_NFT_MASQ_IPV6=m 229CONFIG_NFT_MASQ_IPV6=m
@@ -256,7 +252,6 @@ CONFIG_IP6_NF_NAT=m
256CONFIG_IP6_NF_TARGET_MASQUERADE=m 252CONFIG_IP6_NF_TARGET_MASQUERADE=m
257CONFIG_IP6_NF_TARGET_NPT=m 253CONFIG_IP6_NF_TARGET_NPT=m
258CONFIG_NF_TABLES_BRIDGE=y 254CONFIG_NF_TABLES_BRIDGE=y
259CONFIG_NFT_BRIDGE_META=m
260CONFIG_NFT_BRIDGE_REJECT=m 255CONFIG_NFT_BRIDGE_REJECT=m
261CONFIG_NF_LOG_BRIDGE=m 256CONFIG_NF_LOG_BRIDGE=m
262CONFIG_BRIDGE_NF_EBTABLES=m 257CONFIG_BRIDGE_NF_EBTABLES=m
@@ -297,6 +292,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
297CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 292CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
298CONFIG_DNS_RESOLVER=y 293CONFIG_DNS_RESOLVER=y
299CONFIG_BATMAN_ADV=m 294CONFIG_BATMAN_ADV=m
295# CONFIG_BATMAN_ADV_BATMAN_V is not set
300CONFIG_BATMAN_ADV_DAT=y 296CONFIG_BATMAN_ADV_DAT=y
301CONFIG_BATMAN_ADV_NC=y 297CONFIG_BATMAN_ADV_NC=y
302CONFIG_BATMAN_ADV_MCAST=y 298CONFIG_BATMAN_ADV_MCAST=y
@@ -350,6 +346,7 @@ CONFIG_DM_UNSTRIPED=m
350CONFIG_DM_CRYPT=m 346CONFIG_DM_CRYPT=m
351CONFIG_DM_SNAPSHOT=m 347CONFIG_DM_SNAPSHOT=m
352CONFIG_DM_THIN_PROVISIONING=m 348CONFIG_DM_THIN_PROVISIONING=m
349CONFIG_DM_WRITECACHE=m
353CONFIG_DM_ERA=m 350CONFIG_DM_ERA=m
354CONFIG_DM_MIRROR=m 351CONFIG_DM_MIRROR=m
355CONFIG_DM_RAID=m 352CONFIG_DM_RAID=m
@@ -388,8 +385,8 @@ CONFIG_VETH=m
388# CONFIG_NET_VENDOR_AMD is not set 385# CONFIG_NET_VENDOR_AMD is not set
389# CONFIG_NET_VENDOR_AQUANTIA is not set 386# CONFIG_NET_VENDOR_AQUANTIA is not set
390# CONFIG_NET_VENDOR_ARC is not set 387# CONFIG_NET_VENDOR_ARC is not set
391# CONFIG_NET_CADENCE is not set
392# CONFIG_NET_VENDOR_BROADCOM is not set 388# CONFIG_NET_VENDOR_BROADCOM is not set
389# CONFIG_NET_CADENCE is not set
393# CONFIG_NET_VENDOR_CIRRUS is not set 390# CONFIG_NET_VENDOR_CIRRUS is not set
394# CONFIG_NET_VENDOR_CORTINA is not set 391# CONFIG_NET_VENDOR_CORTINA is not set
395# CONFIG_NET_VENDOR_EZCHIP is not set 392# CONFIG_NET_VENDOR_EZCHIP is not set
@@ -398,6 +395,7 @@ CONFIG_VETH=m
398# CONFIG_NET_VENDOR_INTEL is not set 395# CONFIG_NET_VENDOR_INTEL is not set
399# CONFIG_NET_VENDOR_MARVELL is not set 396# CONFIG_NET_VENDOR_MARVELL is not set
400# CONFIG_NET_VENDOR_MICREL is not set 397# CONFIG_NET_VENDOR_MICREL is not set
398# CONFIG_NET_VENDOR_MICROSEMI is not set
401# CONFIG_NET_VENDOR_NETRONOME is not set 399# CONFIG_NET_VENDOR_NETRONOME is not set
402# CONFIG_NET_VENDOR_NI is not set 400# CONFIG_NET_VENDOR_NI is not set
403CONFIG_NE2000=y 401CONFIG_NE2000=y
@@ -410,9 +408,9 @@ CONFIG_NE2000=y
410# CONFIG_NET_VENDOR_SMSC is not set 408# CONFIG_NET_VENDOR_SMSC is not set
411# CONFIG_NET_VENDOR_SOCIONEXT is not set 409# CONFIG_NET_VENDOR_SOCIONEXT is not set
412# CONFIG_NET_VENDOR_STMICRO is not set 410# CONFIG_NET_VENDOR_STMICRO is not set
411# CONFIG_NET_VENDOR_SYNOPSYS is not set
413# CONFIG_NET_VENDOR_VIA is not set 412# CONFIG_NET_VENDOR_VIA is not set
414# CONFIG_NET_VENDOR_WIZNET is not set 413# CONFIG_NET_VENDOR_WIZNET is not set
415# CONFIG_NET_VENDOR_SYNOPSYS is not set
416CONFIG_PLIP=m 414CONFIG_PLIP=m
417CONFIG_PPP=m 415CONFIG_PPP=m
418CONFIG_PPP_BSDCOMP=m 416CONFIG_PPP_BSDCOMP=m
@@ -455,6 +453,7 @@ CONFIG_HIDRAW=y
455CONFIG_UHID=m 453CONFIG_UHID=m
456# CONFIG_HID_GENERIC is not set 454# CONFIG_HID_GENERIC is not set
457# CONFIG_HID_ITE is not set 455# CONFIG_HID_ITE is not set
456# CONFIG_HID_REDRAGON is not set
458# CONFIG_USB_SUPPORT is not set 457# CONFIG_USB_SUPPORT is not set
459CONFIG_RTC_CLASS=y 458CONFIG_RTC_CLASS=y
460# CONFIG_RTC_NVMEM is not set 459# CONFIG_RTC_NVMEM is not set
@@ -473,7 +472,7 @@ CONFIG_FS_ENCRYPTION=m
473CONFIG_FANOTIFY=y 472CONFIG_FANOTIFY=y
474CONFIG_QUOTA_NETLINK_INTERFACE=y 473CONFIG_QUOTA_NETLINK_INTERFACE=y
475# CONFIG_PRINT_QUOTA_WARNING is not set 474# CONFIG_PRINT_QUOTA_WARNING is not set
476CONFIG_AUTOFS4_FS=m 475CONFIG_AUTOFS_FS=m
477CONFIG_FUSE_FS=m 476CONFIG_FUSE_FS=m
478CONFIG_CUSE=m 477CONFIG_CUSE=m
479CONFIG_OVERLAY_FS=m 478CONFIG_OVERLAY_FS=m
@@ -574,6 +573,7 @@ CONFIG_TEST_KSTRTOX=m
574CONFIG_TEST_PRINTF=m 573CONFIG_TEST_PRINTF=m
575CONFIG_TEST_BITMAP=m 574CONFIG_TEST_BITMAP=m
576CONFIG_TEST_UUID=m 575CONFIG_TEST_UUID=m
576CONFIG_TEST_OVERFLOW=m
577CONFIG_TEST_RHASHTABLE=m 577CONFIG_TEST_RHASHTABLE=m
578CONFIG_TEST_HASH=m 578CONFIG_TEST_HASH=m
579CONFIG_TEST_USER_COPY=m 579CONFIG_TEST_USER_COPY=m
@@ -596,6 +596,11 @@ CONFIG_CRYPTO_CRYPTD=m
596CONFIG_CRYPTO_MCRYPTD=m 596CONFIG_CRYPTO_MCRYPTD=m
597CONFIG_CRYPTO_TEST=m 597CONFIG_CRYPTO_TEST=m
598CONFIG_CRYPTO_CHACHA20POLY1305=m 598CONFIG_CRYPTO_CHACHA20POLY1305=m
599CONFIG_CRYPTO_AEGIS128=m
600CONFIG_CRYPTO_AEGIS128L=m
601CONFIG_CRYPTO_AEGIS256=m
602CONFIG_CRYPTO_MORUS640=m
603CONFIG_CRYPTO_MORUS1280=m
599CONFIG_CRYPTO_CFB=m 604CONFIG_CRYPTO_CFB=m
600CONFIG_CRYPTO_LRW=m 605CONFIG_CRYPTO_LRW=m
601CONFIG_CRYPTO_PCBC=m 606CONFIG_CRYPTO_PCBC=m
@@ -631,6 +636,7 @@ CONFIG_CRYPTO_LZO=m
631CONFIG_CRYPTO_842=m 636CONFIG_CRYPTO_842=m
632CONFIG_CRYPTO_LZ4=m 637CONFIG_CRYPTO_LZ4=m
633CONFIG_CRYPTO_LZ4HC=m 638CONFIG_CRYPTO_LZ4HC=m
639CONFIG_CRYPTO_ZSTD=m
634CONFIG_CRYPTO_ANSI_CPRNG=m 640CONFIG_CRYPTO_ANSI_CPRNG=m
635CONFIG_CRYPTO_DRBG_HASH=y 641CONFIG_CRYPTO_DRBG_HASH=y
636CONFIG_CRYPTO_DRBG_CTR=y 642CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index d543ed5dfa96..573bf922d448 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -45,6 +45,7 @@ CONFIG_UNIX_DIAG=m
45CONFIG_TLS=m 45CONFIG_TLS=m
46CONFIG_XFRM_MIGRATE=y 46CONFIG_XFRM_MIGRATE=y
47CONFIG_NET_KEY=y 47CONFIG_NET_KEY=y
48CONFIG_XDP_SOCKETS=y
48CONFIG_INET=y 49CONFIG_INET=y
49CONFIG_IP_PNP=y 50CONFIG_IP_PNP=y
50CONFIG_IP_PNP_DHCP=y 51CONFIG_IP_PNP_DHCP=y
@@ -91,18 +92,14 @@ CONFIG_NF_CONNTRACK_SANE=m
91CONFIG_NF_CONNTRACK_SIP=m 92CONFIG_NF_CONNTRACK_SIP=m
92CONFIG_NF_CONNTRACK_TFTP=m 93CONFIG_NF_CONNTRACK_TFTP=m
93CONFIG_NF_TABLES=m 94CONFIG_NF_TABLES=m
95CONFIG_NF_TABLES_SET=m
94CONFIG_NF_TABLES_INET=y 96CONFIG_NF_TABLES_INET=y
95CONFIG_NF_TABLES_NETDEV=y 97CONFIG_NF_TABLES_NETDEV=y
96CONFIG_NFT_EXTHDR=m
97CONFIG_NFT_META=m
98CONFIG_NFT_RT=m
99CONFIG_NFT_NUMGEN=m 98CONFIG_NFT_NUMGEN=m
100CONFIG_NFT_CT=m 99CONFIG_NFT_CT=m
101CONFIG_NFT_FLOW_OFFLOAD=m 100CONFIG_NFT_FLOW_OFFLOAD=m
102CONFIG_NFT_SET_RBTREE=m
103CONFIG_NFT_SET_HASH=m
104CONFIG_NFT_SET_BITMAP=m
105CONFIG_NFT_COUNTER=m 101CONFIG_NFT_COUNTER=m
102CONFIG_NFT_CONNLIMIT=m
106CONFIG_NFT_LOG=m 103CONFIG_NFT_LOG=m
107CONFIG_NFT_LIMIT=m 104CONFIG_NFT_LIMIT=m
108CONFIG_NFT_MASQ=m 105CONFIG_NFT_MASQ=m
@@ -115,6 +112,7 @@ CONFIG_NFT_REJECT=m
115CONFIG_NFT_COMPAT=m 112CONFIG_NFT_COMPAT=m
116CONFIG_NFT_HASH=m 113CONFIG_NFT_HASH=m
117CONFIG_NFT_FIB_INET=m 114CONFIG_NFT_FIB_INET=m
115CONFIG_NFT_SOCKET=m
118CONFIG_NFT_DUP_NETDEV=m 116CONFIG_NFT_DUP_NETDEV=m
119CONFIG_NFT_FWD_NETDEV=m 117CONFIG_NFT_FWD_NETDEV=m
120CONFIG_NFT_FIB_NETDEV=m 118CONFIG_NFT_FIB_NETDEV=m
@@ -193,7 +191,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
193CONFIG_IP_SET_HASH_NETIFACE=m 191CONFIG_IP_SET_HASH_NETIFACE=m
194CONFIG_IP_SET_LIST_SET=m 192CONFIG_IP_SET_LIST_SET=m
195CONFIG_NF_CONNTRACK_IPV4=m 193CONFIG_NF_CONNTRACK_IPV4=m
196CONFIG_NF_SOCKET_IPV4=m
197CONFIG_NFT_CHAIN_ROUTE_IPV4=m 194CONFIG_NFT_CHAIN_ROUTE_IPV4=m
198CONFIG_NFT_DUP_IPV4=m 195CONFIG_NFT_DUP_IPV4=m
199CONFIG_NFT_FIB_IPV4=m 196CONFIG_NFT_FIB_IPV4=m
@@ -224,7 +221,6 @@ CONFIG_IP_NF_ARPTABLES=m
224CONFIG_IP_NF_ARPFILTER=m 221CONFIG_IP_NF_ARPFILTER=m
225CONFIG_IP_NF_ARP_MANGLE=m 222CONFIG_IP_NF_ARP_MANGLE=m
226CONFIG_NF_CONNTRACK_IPV6=m 223CONFIG_NF_CONNTRACK_IPV6=m
227CONFIG_NF_SOCKET_IPV6=m
228CONFIG_NFT_CHAIN_ROUTE_IPV6=m 224CONFIG_NFT_CHAIN_ROUTE_IPV6=m
229CONFIG_NFT_CHAIN_NAT_IPV6=m 225CONFIG_NFT_CHAIN_NAT_IPV6=m
230CONFIG_NFT_MASQ_IPV6=m 226CONFIG_NFT_MASQ_IPV6=m
@@ -253,7 +249,6 @@ CONFIG_IP6_NF_NAT=m
253CONFIG_IP6_NF_TARGET_MASQUERADE=m 249CONFIG_IP6_NF_TARGET_MASQUERADE=m
254CONFIG_IP6_NF_TARGET_NPT=m 250CONFIG_IP6_NF_TARGET_NPT=m
255CONFIG_NF_TABLES_BRIDGE=y 251CONFIG_NF_TABLES_BRIDGE=y
256CONFIG_NFT_BRIDGE_META=m
257CONFIG_NFT_BRIDGE_REJECT=m 252CONFIG_NFT_BRIDGE_REJECT=m
258CONFIG_NF_LOG_BRIDGE=m 253CONFIG_NF_LOG_BRIDGE=m
259CONFIG_BRIDGE_NF_EBTABLES=m 254CONFIG_BRIDGE_NF_EBTABLES=m
@@ -294,6 +289,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
294CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 289CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
295CONFIG_DNS_RESOLVER=y 290CONFIG_DNS_RESOLVER=y
296CONFIG_BATMAN_ADV=m 291CONFIG_BATMAN_ADV=m
292# CONFIG_BATMAN_ADV_BATMAN_V is not set
297CONFIG_BATMAN_ADV_DAT=y 293CONFIG_BATMAN_ADV_DAT=y
298CONFIG_BATMAN_ADV_NC=y 294CONFIG_BATMAN_ADV_NC=y
299CONFIG_BATMAN_ADV_MCAST=y 295CONFIG_BATMAN_ADV_MCAST=y
@@ -341,6 +337,7 @@ CONFIG_DM_UNSTRIPED=m
341CONFIG_DM_CRYPT=m 337CONFIG_DM_CRYPT=m
342CONFIG_DM_SNAPSHOT=m 338CONFIG_DM_SNAPSHOT=m
343CONFIG_DM_THIN_PROVISIONING=m 339CONFIG_DM_THIN_PROVISIONING=m
340CONFIG_DM_WRITECACHE=m
344CONFIG_DM_ERA=m 341CONFIG_DM_ERA=m
345CONFIG_DM_MIRROR=m 342CONFIG_DM_MIRROR=m
346CONFIG_DM_RAID=m 343CONFIG_DM_RAID=m
@@ -385,6 +382,7 @@ CONFIG_SUN3LANCE=y
385CONFIG_SUN3_82586=y 382CONFIG_SUN3_82586=y
386# CONFIG_NET_VENDOR_MARVELL is not set 383# CONFIG_NET_VENDOR_MARVELL is not set
387# CONFIG_NET_VENDOR_MICREL is not set 384# CONFIG_NET_VENDOR_MICREL is not set
385# CONFIG_NET_VENDOR_MICROSEMI is not set
388# CONFIG_NET_VENDOR_NATSEMI is not set 386# CONFIG_NET_VENDOR_NATSEMI is not set
389# CONFIG_NET_VENDOR_NETRONOME is not set 387# CONFIG_NET_VENDOR_NETRONOME is not set
390# CONFIG_NET_VENDOR_NI is not set 388# CONFIG_NET_VENDOR_NI is not set
@@ -397,9 +395,9 @@ CONFIG_SUN3_82586=y
397# CONFIG_NET_VENDOR_SOCIONEXT is not set 395# CONFIG_NET_VENDOR_SOCIONEXT is not set
398# CONFIG_NET_VENDOR_STMICRO is not set 396# CONFIG_NET_VENDOR_STMICRO is not set
399# CONFIG_NET_VENDOR_SUN is not set 397# CONFIG_NET_VENDOR_SUN is not set
398# CONFIG_NET_VENDOR_SYNOPSYS is not set
400# CONFIG_NET_VENDOR_VIA is not set 399# CONFIG_NET_VENDOR_VIA is not set
401# CONFIG_NET_VENDOR_WIZNET is not set 400# CONFIG_NET_VENDOR_WIZNET is not set
402# CONFIG_NET_VENDOR_SYNOPSYS is not set
403CONFIG_PPP=m 401CONFIG_PPP=m
404CONFIG_PPP_BSDCOMP=m 402CONFIG_PPP_BSDCOMP=m
405CONFIG_PPP_DEFLATE=m 403CONFIG_PPP_DEFLATE=m
@@ -435,6 +433,7 @@ CONFIG_HIDRAW=y
435CONFIG_UHID=m 433CONFIG_UHID=m
436# CONFIG_HID_GENERIC is not set 434# CONFIG_HID_GENERIC is not set
437# CONFIG_HID_ITE is not set 435# CONFIG_HID_ITE is not set
436# CONFIG_HID_REDRAGON is not set
438# CONFIG_USB_SUPPORT is not set 437# CONFIG_USB_SUPPORT is not set
439CONFIG_RTC_CLASS=y 438CONFIG_RTC_CLASS=y
440# CONFIG_RTC_NVMEM is not set 439# CONFIG_RTC_NVMEM is not set
@@ -452,7 +451,7 @@ CONFIG_FS_ENCRYPTION=m
452CONFIG_FANOTIFY=y 451CONFIG_FANOTIFY=y
453CONFIG_QUOTA_NETLINK_INTERFACE=y 452CONFIG_QUOTA_NETLINK_INTERFACE=y
454# CONFIG_PRINT_QUOTA_WARNING is not set 453# CONFIG_PRINT_QUOTA_WARNING is not set
455CONFIG_AUTOFS4_FS=m 454CONFIG_AUTOFS_FS=m
456CONFIG_FUSE_FS=m 455CONFIG_FUSE_FS=m
457CONFIG_CUSE=m 456CONFIG_CUSE=m
458CONFIG_OVERLAY_FS=m 457CONFIG_OVERLAY_FS=m
@@ -553,6 +552,7 @@ CONFIG_TEST_KSTRTOX=m
553CONFIG_TEST_PRINTF=m 552CONFIG_TEST_PRINTF=m
554CONFIG_TEST_BITMAP=m 553CONFIG_TEST_BITMAP=m
555CONFIG_TEST_UUID=m 554CONFIG_TEST_UUID=m
555CONFIG_TEST_OVERFLOW=m
556CONFIG_TEST_RHASHTABLE=m 556CONFIG_TEST_RHASHTABLE=m
557CONFIG_TEST_HASH=m 557CONFIG_TEST_HASH=m
558CONFIG_TEST_USER_COPY=m 558CONFIG_TEST_USER_COPY=m
@@ -574,6 +574,11 @@ CONFIG_CRYPTO_CRYPTD=m
574CONFIG_CRYPTO_MCRYPTD=m 574CONFIG_CRYPTO_MCRYPTD=m
575CONFIG_CRYPTO_TEST=m 575CONFIG_CRYPTO_TEST=m
576CONFIG_CRYPTO_CHACHA20POLY1305=m 576CONFIG_CRYPTO_CHACHA20POLY1305=m
577CONFIG_CRYPTO_AEGIS128=m
578CONFIG_CRYPTO_AEGIS128L=m
579CONFIG_CRYPTO_AEGIS256=m
580CONFIG_CRYPTO_MORUS640=m
581CONFIG_CRYPTO_MORUS1280=m
577CONFIG_CRYPTO_CFB=m 582CONFIG_CRYPTO_CFB=m
578CONFIG_CRYPTO_LRW=m 583CONFIG_CRYPTO_LRW=m
579CONFIG_CRYPTO_PCBC=m 584CONFIG_CRYPTO_PCBC=m
@@ -609,6 +614,7 @@ CONFIG_CRYPTO_LZO=m
609CONFIG_CRYPTO_842=m 614CONFIG_CRYPTO_842=m
610CONFIG_CRYPTO_LZ4=m 615CONFIG_CRYPTO_LZ4=m
611CONFIG_CRYPTO_LZ4HC=m 616CONFIG_CRYPTO_LZ4HC=m
617CONFIG_CRYPTO_ZSTD=m
612CONFIG_CRYPTO_ANSI_CPRNG=m 618CONFIG_CRYPTO_ANSI_CPRNG=m
613CONFIG_CRYPTO_DRBG_HASH=y 619CONFIG_CRYPTO_DRBG_HASH=y
614CONFIG_CRYPTO_DRBG_CTR=y 620CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a67e54246023..efb27a7fcc55 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -45,6 +45,7 @@ CONFIG_UNIX_DIAG=m
45CONFIG_TLS=m 45CONFIG_TLS=m
46CONFIG_XFRM_MIGRATE=y 46CONFIG_XFRM_MIGRATE=y
47CONFIG_NET_KEY=y 47CONFIG_NET_KEY=y
48CONFIG_XDP_SOCKETS=y
48CONFIG_INET=y 49CONFIG_INET=y
49CONFIG_IP_PNP=y 50CONFIG_IP_PNP=y
50CONFIG_IP_PNP_DHCP=y 51CONFIG_IP_PNP_DHCP=y
@@ -91,18 +92,14 @@ CONFIG_NF_CONNTRACK_SANE=m
91CONFIG_NF_CONNTRACK_SIP=m 92CONFIG_NF_CONNTRACK_SIP=m
92CONFIG_NF_CONNTRACK_TFTP=m 93CONFIG_NF_CONNTRACK_TFTP=m
93CONFIG_NF_TABLES=m 94CONFIG_NF_TABLES=m
95CONFIG_NF_TABLES_SET=m
94CONFIG_NF_TABLES_INET=y 96CONFIG_NF_TABLES_INET=y
95CONFIG_NF_TABLES_NETDEV=y 97CONFIG_NF_TABLES_NETDEV=y
96CONFIG_NFT_EXTHDR=m
97CONFIG_NFT_META=m
98CONFIG_NFT_RT=m
99CONFIG_NFT_NUMGEN=m 98CONFIG_NFT_NUMGEN=m
100CONFIG_NFT_CT=m 99CONFIG_NFT_CT=m
101CONFIG_NFT_FLOW_OFFLOAD=m 100CONFIG_NFT_FLOW_OFFLOAD=m
102CONFIG_NFT_SET_RBTREE=m
103CONFIG_NFT_SET_HASH=m
104CONFIG_NFT_SET_BITMAP=m
105CONFIG_NFT_COUNTER=m 101CONFIG_NFT_COUNTER=m
102CONFIG_NFT_CONNLIMIT=m
106CONFIG_NFT_LOG=m 103CONFIG_NFT_LOG=m
107CONFIG_NFT_LIMIT=m 104CONFIG_NFT_LIMIT=m
108CONFIG_NFT_MASQ=m 105CONFIG_NFT_MASQ=m
@@ -115,6 +112,7 @@ CONFIG_NFT_REJECT=m
115CONFIG_NFT_COMPAT=m 112CONFIG_NFT_COMPAT=m
116CONFIG_NFT_HASH=m 113CONFIG_NFT_HASH=m
117CONFIG_NFT_FIB_INET=m 114CONFIG_NFT_FIB_INET=m
115CONFIG_NFT_SOCKET=m
118CONFIG_NFT_DUP_NETDEV=m 116CONFIG_NFT_DUP_NETDEV=m
119CONFIG_NFT_FWD_NETDEV=m 117CONFIG_NFT_FWD_NETDEV=m
120CONFIG_NFT_FIB_NETDEV=m 118CONFIG_NFT_FIB_NETDEV=m
@@ -193,7 +191,6 @@ CONFIG_IP_SET_HASH_NETPORT=m
193CONFIG_IP_SET_HASH_NETIFACE=m 191CONFIG_IP_SET_HASH_NETIFACE=m
194CONFIG_IP_SET_LIST_SET=m 192CONFIG_IP_SET_LIST_SET=m
195CONFIG_NF_CONNTRACK_IPV4=m 193CONFIG_NF_CONNTRACK_IPV4=m
196CONFIG_NF_SOCKET_IPV4=m
197CONFIG_NFT_CHAIN_ROUTE_IPV4=m 194CONFIG_NFT_CHAIN_ROUTE_IPV4=m
198CONFIG_NFT_DUP_IPV4=m 195CONFIG_NFT_DUP_IPV4=m
199CONFIG_NFT_FIB_IPV4=m 196CONFIG_NFT_FIB_IPV4=m
@@ -224,7 +221,6 @@ CONFIG_IP_NF_ARPTABLES=m
224CONFIG_IP_NF_ARPFILTER=m 221CONFIG_IP_NF_ARPFILTER=m
225CONFIG_IP_NF_ARP_MANGLE=m 222CONFIG_IP_NF_ARP_MANGLE=m
226CONFIG_NF_CONNTRACK_IPV6=m 223CONFIG_NF_CONNTRACK_IPV6=m
227CONFIG_NF_SOCKET_IPV6=m
228CONFIG_NFT_CHAIN_ROUTE_IPV6=m 224CONFIG_NFT_CHAIN_ROUTE_IPV6=m
229CONFIG_NFT_CHAIN_NAT_IPV6=m 225CONFIG_NFT_CHAIN_NAT_IPV6=m
230CONFIG_NFT_MASQ_IPV6=m 226CONFIG_NFT_MASQ_IPV6=m
@@ -253,7 +249,6 @@ CONFIG_IP6_NF_NAT=m
253CONFIG_IP6_NF_TARGET_MASQUERADE=m 249CONFIG_IP6_NF_TARGET_MASQUERADE=m
254CONFIG_IP6_NF_TARGET_NPT=m 250CONFIG_IP6_NF_TARGET_NPT=m
255CONFIG_NF_TABLES_BRIDGE=y 251CONFIG_NF_TABLES_BRIDGE=y
256CONFIG_NFT_BRIDGE_META=m
257CONFIG_NFT_BRIDGE_REJECT=m 252CONFIG_NFT_BRIDGE_REJECT=m
258CONFIG_NF_LOG_BRIDGE=m 253CONFIG_NF_LOG_BRIDGE=m
259CONFIG_BRIDGE_NF_EBTABLES=m 254CONFIG_BRIDGE_NF_EBTABLES=m
@@ -294,6 +289,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m
294CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m 289CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
295CONFIG_DNS_RESOLVER=y 290CONFIG_DNS_RESOLVER=y
296CONFIG_BATMAN_ADV=m 291CONFIG_BATMAN_ADV=m
292# CONFIG_BATMAN_ADV_BATMAN_V is not set
297CONFIG_BATMAN_ADV_DAT=y 293CONFIG_BATMAN_ADV_DAT=y
298CONFIG_BATMAN_ADV_NC=y 294CONFIG_BATMAN_ADV_NC=y
299CONFIG_BATMAN_ADV_MCAST=y 295CONFIG_BATMAN_ADV_MCAST=y
@@ -341,6 +337,7 @@ CONFIG_DM_UNSTRIPED=m
341CONFIG_DM_CRYPT=m 337CONFIG_DM_CRYPT=m
342CONFIG_DM_SNAPSHOT=m 338CONFIG_DM_SNAPSHOT=m
343CONFIG_DM_THIN_PROVISIONING=m 339CONFIG_DM_THIN_PROVISIONING=m
340CONFIG_DM_WRITECACHE=m
344CONFIG_DM_ERA=m 341CONFIG_DM_ERA=m
345CONFIG_DM_MIRROR=m 342CONFIG_DM_MIRROR=m
346CONFIG_DM_RAID=m 343CONFIG_DM_RAID=m
@@ -378,14 +375,15 @@ CONFIG_VETH=m
378CONFIG_SUN3LANCE=y 375CONFIG_SUN3LANCE=y
379# CONFIG_NET_VENDOR_AQUANTIA is not set 376# CONFIG_NET_VENDOR_AQUANTIA is not set
380# CONFIG_NET_VENDOR_ARC is not set 377# CONFIG_NET_VENDOR_ARC is not set
381# CONFIG_NET_CADENCE is not set
382# CONFIG_NET_VENDOR_BROADCOM is not set 378# CONFIG_NET_VENDOR_BROADCOM is not set
379# CONFIG_NET_CADENCE is not set
383# CONFIG_NET_VENDOR_CORTINA is not set 380# CONFIG_NET_VENDOR_CORTINA is not set
384# CONFIG_NET_VENDOR_EZCHIP is not set 381# CONFIG_NET_VENDOR_EZCHIP is not set
385# CONFIG_NET_VENDOR_HUAWEI is not set 382# CONFIG_NET_VENDOR_HUAWEI is not set
386# CONFIG_NET_VENDOR_INTEL is not set 383# CONFIG_NET_VENDOR_INTEL is not set
387# CONFIG_NET_VENDOR_MARVELL is not set 384# CONFIG_NET_VENDOR_MARVELL is not set
388# CONFIG_NET_VENDOR_MICREL is not set 385# CONFIG_NET_VENDOR_MICREL is not set
386# CONFIG_NET_VENDOR_MICROSEMI is not set
389# CONFIG_NET_VENDOR_NATSEMI is not set 387# CONFIG_NET_VENDOR_NATSEMI is not set
390# CONFIG_NET_VENDOR_NETRONOME is not set 388# CONFIG_NET_VENDOR_NETRONOME is not set
391# CONFIG_NET_VENDOR_NI is not set 389# CONFIG_NET_VENDOR_NI is not set
@@ -397,9 +395,9 @@ CONFIG_SUN3LANCE=y
397# CONFIG_NET_VENDOR_SOLARFLARE is not set 395# CONFIG_NET_VENDOR_SOLARFLARE is not set
398# CONFIG_NET_VENDOR_SOCIONEXT is not set 396# CONFIG_NET_VENDOR_SOCIONEXT is not set
399# CONFIG_NET_VENDOR_STMICRO is not set 397# CONFIG_NET_VENDOR_STMICRO is not set
398# CONFIG_NET_VENDOR_SYNOPSYS is not set
400# CONFIG_NET_VENDOR_VIA is not set 399# CONFIG_NET_VENDOR_VIA is not set
401# CONFIG_NET_VENDOR_WIZNET is not set 400# CONFIG_NET_VENDOR_WIZNET is not set
402# CONFIG_NET_VENDOR_SYNOPSYS is not set
403CONFIG_PPP=m 401CONFIG_PPP=m
404CONFIG_PPP_BSDCOMP=m 402CONFIG_PPP_BSDCOMP=m
405CONFIG_PPP_DEFLATE=m 403CONFIG_PPP_DEFLATE=m
@@ -435,6 +433,7 @@ CONFIG_HIDRAW=y
435CONFIG_UHID=m 433CONFIG_UHID=m
436# CONFIG_HID_GENERIC is not set 434# CONFIG_HID_GENERIC is not set
437# CONFIG_HID_ITE is not set 435# CONFIG_HID_ITE is not set
436# CONFIG_HID_REDRAGON is not set
438# CONFIG_USB_SUPPORT is not set 437# CONFIG_USB_SUPPORT is not set
439CONFIG_RTC_CLASS=y 438CONFIG_RTC_CLASS=y
440# CONFIG_RTC_NVMEM is not set 439# CONFIG_RTC_NVMEM is not set
@@ -452,7 +451,7 @@ CONFIG_FS_ENCRYPTION=m
452CONFIG_FANOTIFY=y 451CONFIG_FANOTIFY=y
453CONFIG_QUOTA_NETLINK_INTERFACE=y 452CONFIG_QUOTA_NETLINK_INTERFACE=y
454# CONFIG_PRINT_QUOTA_WARNING is not set 453# CONFIG_PRINT_QUOTA_WARNING is not set
455CONFIG_AUTOFS4_FS=m 454CONFIG_AUTOFS_FS=m
456CONFIG_FUSE_FS=m 455CONFIG_FUSE_FS=m
457CONFIG_CUSE=m 456CONFIG_CUSE=m
458CONFIG_OVERLAY_FS=m 457CONFIG_OVERLAY_FS=m
@@ -553,6 +552,7 @@ CONFIG_TEST_KSTRTOX=m
553CONFIG_TEST_PRINTF=m 552CONFIG_TEST_PRINTF=m
554CONFIG_TEST_BITMAP=m 553CONFIG_TEST_BITMAP=m
555CONFIG_TEST_UUID=m 554CONFIG_TEST_UUID=m
555CONFIG_TEST_OVERFLOW=m
556CONFIG_TEST_RHASHTABLE=m 556CONFIG_TEST_RHASHTABLE=m
557CONFIG_TEST_HASH=m 557CONFIG_TEST_HASH=m
558CONFIG_TEST_USER_COPY=m 558CONFIG_TEST_USER_COPY=m
@@ -575,6 +575,11 @@ CONFIG_CRYPTO_CRYPTD=m
575CONFIG_CRYPTO_MCRYPTD=m 575CONFIG_CRYPTO_MCRYPTD=m
576CONFIG_CRYPTO_TEST=m 576CONFIG_CRYPTO_TEST=m
577CONFIG_CRYPTO_CHACHA20POLY1305=m 577CONFIG_CRYPTO_CHACHA20POLY1305=m
578CONFIG_CRYPTO_AEGIS128=m
579CONFIG_CRYPTO_AEGIS128L=m
580CONFIG_CRYPTO_AEGIS256=m
581CONFIG_CRYPTO_MORUS640=m
582CONFIG_CRYPTO_MORUS1280=m
578CONFIG_CRYPTO_CFB=m 583CONFIG_CRYPTO_CFB=m
579CONFIG_CRYPTO_LRW=m 584CONFIG_CRYPTO_LRW=m
580CONFIG_CRYPTO_PCBC=m 585CONFIG_CRYPTO_PCBC=m
@@ -610,6 +615,7 @@ CONFIG_CRYPTO_LZO=m
610CONFIG_CRYPTO_842=m 615CONFIG_CRYPTO_842=m
611CONFIG_CRYPTO_LZ4=m 616CONFIG_CRYPTO_LZ4=m
612CONFIG_CRYPTO_LZ4HC=m 617CONFIG_CRYPTO_LZ4HC=m
618CONFIG_CRYPTO_ZSTD=m
613CONFIG_CRYPTO_ANSI_CPRNG=m 619CONFIG_CRYPTO_ANSI_CPRNG=m
614CONFIG_CRYPTO_DRBG_HASH=y 620CONFIG_CRYPTO_DRBG_HASH=y
615CONFIG_CRYPTO_DRBG_CTR=y 621CONFIG_CRYPTO_DRBG_CTR=y
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 4d8d68c4e3dd..a4b8d3331a9e 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -1,6 +1,7 @@
1generic-y += barrier.h 1generic-y += barrier.h
2generic-y += compat.h 2generic-y += compat.h
3generic-y += device.h 3generic-y += device.h
4generic-y += dma-mapping.h
4generic-y += emergency-restart.h 5generic-y += emergency-restart.h
5generic-y += exec.h 6generic-y += exec.h
6generic-y += extable.h 7generic-y += extable.h
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index e993e2860ee1..47228b0d4163 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -126,11 +126,13 @@ static inline void atomic_inc(atomic_t *v)
126{ 126{
127 __asm__ __volatile__("addql #1,%0" : "+m" (*v)); 127 __asm__ __volatile__("addql #1,%0" : "+m" (*v));
128} 128}
129#define atomic_inc atomic_inc
129 130
130static inline void atomic_dec(atomic_t *v) 131static inline void atomic_dec(atomic_t *v)
131{ 132{
132 __asm__ __volatile__("subql #1,%0" : "+m" (*v)); 133 __asm__ __volatile__("subql #1,%0" : "+m" (*v));
133} 134}
135#define atomic_dec atomic_dec
134 136
135static inline int atomic_dec_and_test(atomic_t *v) 137static inline int atomic_dec_and_test(atomic_t *v)
136{ 138{
@@ -138,6 +140,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
138 __asm__ __volatile__("subql #1,%1; seq %0" : "=d" (c), "+m" (*v)); 140 __asm__ __volatile__("subql #1,%1; seq %0" : "=d" (c), "+m" (*v));
139 return c != 0; 141 return c != 0;
140} 142}
143#define atomic_dec_and_test atomic_dec_and_test
141 144
142static inline int atomic_dec_and_test_lt(atomic_t *v) 145static inline int atomic_dec_and_test_lt(atomic_t *v)
143{ 146{
@@ -155,6 +158,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
155 __asm__ __volatile__("addql #1,%1; seq %0" : "=d" (c), "+m" (*v)); 158 __asm__ __volatile__("addql #1,%1; seq %0" : "=d" (c), "+m" (*v));
156 return c != 0; 159 return c != 0;
157} 160}
161#define atomic_inc_and_test atomic_inc_and_test
158 162
159#ifdef CONFIG_RMW_INSNS 163#ifdef CONFIG_RMW_INSNS
160 164
@@ -190,9 +194,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
190 194
191#endif /* !CONFIG_RMW_INSNS */ 195#endif /* !CONFIG_RMW_INSNS */
192 196
193#define atomic_dec_return(v) atomic_sub_return(1, (v))
194#define atomic_inc_return(v) atomic_add_return(1, (v))
195
196static inline int atomic_sub_and_test(int i, atomic_t *v) 197static inline int atomic_sub_and_test(int i, atomic_t *v)
197{ 198{
198 char c; 199 char c;
@@ -201,6 +202,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v)
201 : ASM_DI (i)); 202 : ASM_DI (i));
202 return c != 0; 203 return c != 0;
203} 204}
205#define atomic_sub_and_test atomic_sub_and_test
204 206
205static inline int atomic_add_negative(int i, atomic_t *v) 207static inline int atomic_add_negative(int i, atomic_t *v)
206{ 208{
@@ -210,20 +212,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
210 : ASM_DI (i)); 212 : ASM_DI (i));
211 return c != 0; 213 return c != 0;
212} 214}
213 215#define atomic_add_negative atomic_add_negative
214static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
215{
216 int c, old;
217 c = atomic_read(v);
218 for (;;) {
219 if (unlikely(c == (u)))
220 break;
221 old = atomic_cmpxchg((v), c, c + (a));
222 if (likely(old == c))
223 break;
224 c = old;
225 }
226 return c;
227}
228 216
229#endif /* __ARCH_M68K_ATOMIC __ */ 217#endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index 93b47b1f6fb4..d979f38af751 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -454,7 +454,7 @@ static inline unsigned long ffz(unsigned long word)
454 */ 454 */
455#if (defined(__mcfisaaplus__) || defined(__mcfisac__)) && \ 455#if (defined(__mcfisaaplus__) || defined(__mcfisac__)) && \
456 !defined(CONFIG_M68000) && !defined(CONFIG_MCPU32) 456 !defined(CONFIG_M68000) && !defined(CONFIG_MCPU32)
457static inline int __ffs(int x) 457static inline unsigned long __ffs(unsigned long x)
458{ 458{
459 __asm__ __volatile__ ("bitrev %0; ff1 %0" 459 __asm__ __volatile__ ("bitrev %0; ff1 %0"
460 : "=d" (x) 460 : "=d" (x)
@@ -493,7 +493,11 @@ static inline int ffs(int x)
493 : "dm" (x & -x)); 493 : "dm" (x & -x));
494 return 32 - cnt; 494 return 32 - cnt;
495} 495}
496#define __ffs(x) (ffs(x) - 1) 496
497static inline unsigned long __ffs(unsigned long x)
498{
499 return ffs(x) - 1;
500}
497 501
498/* 502/*
499 * fls: find last bit set. 503 * fls: find last bit set.
@@ -515,12 +519,16 @@ static inline int __fls(int x)
515 519
516#endif 520#endif
517 521
522/* Simple test-and-set bit locks */
523#define test_and_set_bit_lock test_and_set_bit
524#define clear_bit_unlock clear_bit
525#define __clear_bit_unlock clear_bit_unlock
526
518#include <asm-generic/bitops/ext2-atomic.h> 527#include <asm-generic/bitops/ext2-atomic.h>
519#include <asm-generic/bitops/le.h> 528#include <asm-generic/bitops/le.h>
520#include <asm-generic/bitops/fls64.h> 529#include <asm-generic/bitops/fls64.h>
521#include <asm-generic/bitops/sched.h> 530#include <asm-generic/bitops/sched.h>
522#include <asm-generic/bitops/hweight.h> 531#include <asm-generic/bitops/hweight.h>
523#include <asm-generic/bitops/lock.h>
524#endif /* __KERNEL__ */ 532#endif /* __KERNEL__ */
525 533
526#endif /* _M68K_BITOPS_H */ 534#endif /* _M68K_BITOPS_H */
diff --git a/arch/m68k/include/asm/dma-mapping.h b/arch/m68k/include/asm/dma-mapping.h
deleted file mode 100644
index e3722ed04fbb..000000000000
--- a/arch/m68k/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,12 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _M68K_DMA_MAPPING_H
3#define _M68K_DMA_MAPPING_H
4
5extern const struct dma_map_ops m68k_dma_ops;
6
7static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
8{
9 return &m68k_dma_ops;
10}
11
12#endif /* _M68K_DMA_MAPPING_H */
diff --git a/arch/m68k/include/asm/io.h b/arch/m68k/include/asm/io.h
index ca2849afb087..aabe6420ead2 100644
--- a/arch/m68k/include/asm/io.h
+++ b/arch/m68k/include/asm/io.h
@@ -1,6 +1,13 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _M68K_IO_H
3#define _M68K_IO_H
4
2#if defined(__uClinux__) || defined(CONFIG_COLDFIRE) 5#if defined(__uClinux__) || defined(CONFIG_COLDFIRE)
3#include <asm/io_no.h> 6#include <asm/io_no.h>
4#else 7#else
5#include <asm/io_mm.h> 8#include <asm/io_mm.h>
6#endif 9#endif
10
11#include <asm-generic/io.h>
12
13#endif /* _M68K_IO_H */
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index fe485f4f5fac..782b78f8a048 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -16,13 +16,11 @@
16 * isa_readX(),isa_writeX() are for ISA memory 16 * isa_readX(),isa_writeX() are for ISA memory
17 */ 17 */
18 18
19#ifndef _IO_H 19#ifndef _M68K_IO_MM_H
20#define _IO_H 20#define _M68K_IO_MM_H
21 21
22#ifdef __KERNEL__ 22#ifdef __KERNEL__
23 23
24#define ARCH_HAS_IOREMAP_WT
25
26#include <linux/compiler.h> 24#include <linux/compiler.h>
27#include <asm/raw_io.h> 25#include <asm/raw_io.h>
28#include <asm/virtconvert.h> 26#include <asm/virtconvert.h>
@@ -369,40 +367,6 @@ static inline void isa_delay(void)
369#define writew(val, addr) out_le16((addr), (val)) 367#define writew(val, addr) out_le16((addr), (val))
370#endif /* CONFIG_ATARI_ROM_ISA */ 368#endif /* CONFIG_ATARI_ROM_ISA */
371 369
372#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA)
373/*
374 * We need to define dummy functions for GENERIC_IOMAP support.
375 */
376#define inb(port) 0xff
377#define inb_p(port) 0xff
378#define outb(val,port) ((void)0)
379#define outb_p(val,port) ((void)0)
380#define inw(port) 0xffff
381#define inw_p(port) 0xffff
382#define outw(val,port) ((void)0)
383#define outw_p(val,port) ((void)0)
384#define inl(port) 0xffffffffUL
385#define inl_p(port) 0xffffffffUL
386#define outl(val,port) ((void)0)
387#define outl_p(val,port) ((void)0)
388
389#define insb(port,buf,nr) ((void)0)
390#define outsb(port,buf,nr) ((void)0)
391#define insw(port,buf,nr) ((void)0)
392#define outsw(port,buf,nr) ((void)0)
393#define insl(port,buf,nr) ((void)0)
394#define outsl(port,buf,nr) ((void)0)
395
396/*
397 * These should be valid on any ioremap()ed region
398 */
399#define readb(addr) in_8(addr)
400#define writeb(val,addr) out_8((addr),(val))
401#define readw(addr) in_le16(addr)
402#define writew(val,addr) out_le16((addr),(val))
403
404#endif /* !CONFIG_ISA && !CONFIG_ATARI_ROM_ISA */
405
406#define readl(addr) in_le32(addr) 370#define readl(addr) in_le32(addr)
407#define writel(val,addr) out_le32((addr),(val)) 371#define writel(val,addr) out_le32((addr),(val))
408 372
@@ -444,4 +408,4 @@ static inline void isa_delay(void)
444#define writew_relaxed(b, addr) writew(b, addr) 408#define writew_relaxed(b, addr) writew(b, addr)
445#define writel_relaxed(b, addr) writel(b, addr) 409#define writel_relaxed(b, addr) writel(b, addr)
446 410
447#endif /* _IO_H */ 411#endif /* _M68K_IO_MM_H */
diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
index 83a0a6d449f4..0498192e1d98 100644
--- a/arch/m68k/include/asm/io_no.h
+++ b/arch/m68k/include/asm/io_no.h
@@ -131,19 +131,7 @@ static inline void writel(u32 value, volatile void __iomem *addr)
131#define PCI_SPACE_LIMIT PCI_IO_MASK 131#define PCI_SPACE_LIMIT PCI_IO_MASK
132#endif /* CONFIG_PCI */ 132#endif /* CONFIG_PCI */
133 133
134/*
135 * These are defined in kmap.h as static inline functions. To maintain
136 * previous behavior we put these define guards here so io_mm.h doesn't
137 * see them.
138 */
139#ifdef CONFIG_MMU
140#define memset_io memset_io
141#define memcpy_fromio memcpy_fromio
142#define memcpy_toio memcpy_toio
143#endif
144
145#include <asm/kmap.h> 134#include <asm/kmap.h>
146#include <asm/virtconvert.h> 135#include <asm/virtconvert.h>
147#include <asm-generic/io.h>
148 136
149#endif /* _M68KNOMMU_IO_H */ 137#endif /* _M68KNOMMU_IO_H */
diff --git a/arch/m68k/include/asm/kmap.h b/arch/m68k/include/asm/kmap.h
index 84b8333db8ad..aac7f045f7f0 100644
--- a/arch/m68k/include/asm/kmap.h
+++ b/arch/m68k/include/asm/kmap.h
@@ -4,6 +4,8 @@
4 4
5#ifdef CONFIG_MMU 5#ifdef CONFIG_MMU
6 6
7#define ARCH_HAS_IOREMAP_WT
8
7/* Values for nocacheflag and cmode */ 9/* Values for nocacheflag and cmode */
8#define IOMAP_FULL_CACHING 0 10#define IOMAP_FULL_CACHING 0
9#define IOMAP_NOCACHE_SER 1 11#define IOMAP_NOCACHE_SER 1
@@ -16,6 +18,7 @@
16 */ 18 */
17extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size, 19extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size,
18 int cacheflag); 20 int cacheflag);
21#define iounmap iounmap
19extern void iounmap(void __iomem *addr); 22extern void iounmap(void __iomem *addr);
20extern void __iounmap(void *addr, unsigned long size); 23extern void __iounmap(void *addr, unsigned long size);
21 24
@@ -33,31 +36,35 @@ static inline void __iomem *ioremap_nocache(unsigned long physaddr,
33} 36}
34 37
35#define ioremap_uc ioremap_nocache 38#define ioremap_uc ioremap_nocache
39#define ioremap_wt ioremap_wt
36static inline void __iomem *ioremap_wt(unsigned long physaddr, 40static inline void __iomem *ioremap_wt(unsigned long physaddr,
37 unsigned long size) 41 unsigned long size)
38{ 42{
39 return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); 43 return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
40} 44}
41 45
42#define ioremap_fillcache ioremap_fullcache 46#define ioremap_fullcache ioremap_fullcache
43static inline void __iomem *ioremap_fullcache(unsigned long physaddr, 47static inline void __iomem *ioremap_fullcache(unsigned long physaddr,
44 unsigned long size) 48 unsigned long size)
45{ 49{
46 return __ioremap(physaddr, size, IOMAP_FULL_CACHING); 50 return __ioremap(physaddr, size, IOMAP_FULL_CACHING);
47} 51}
48 52
53#define memset_io memset_io
49static inline void memset_io(volatile void __iomem *addr, unsigned char val, 54static inline void memset_io(volatile void __iomem *addr, unsigned char val,
50 int count) 55 int count)
51{ 56{
52 __builtin_memset((void __force *) addr, val, count); 57 __builtin_memset((void __force *) addr, val, count);
53} 58}
54 59
60#define memcpy_fromio memcpy_fromio
55static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, 61static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
56 int count) 62 int count)
57{ 63{
58 __builtin_memcpy(dst, (void __force *) src, count); 64 __builtin_memcpy(dst, (void __force *) src, count);
59} 65}
60 66
67#define memcpy_toio memcpy_toio
61static inline void memcpy_toio(volatile void __iomem *dst, const void *src, 68static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
62 int count) 69 int count)
63{ 70{
diff --git a/arch/m68k/include/asm/machdep.h b/arch/m68k/include/asm/machdep.h
index 1605da48ebf2..49bd3266b4b1 100644
--- a/arch/m68k/include/asm/machdep.h
+++ b/arch/m68k/include/asm/machdep.h
@@ -22,7 +22,6 @@ extern int (*mach_hwclk)(int, struct rtc_time*);
22extern unsigned int (*mach_get_ss)(void); 22extern unsigned int (*mach_get_ss)(void);
23extern int (*mach_get_rtc_pll)(struct rtc_pll_info *); 23extern int (*mach_get_rtc_pll)(struct rtc_pll_info *);
24extern int (*mach_set_rtc_pll)(struct rtc_pll_info *); 24extern int (*mach_set_rtc_pll)(struct rtc_pll_info *);
25extern int (*mach_set_clock_mmss)(unsigned long);
26extern void (*mach_reset)( void ); 25extern void (*mach_reset)( void );
27extern void (*mach_halt)( void ); 26extern void (*mach_halt)( void );
28extern void (*mach_power_off)( void ); 27extern void (*mach_power_off)( void );
diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h
index 9b840c03ebb7..08cee11180e6 100644
--- a/arch/m68k/include/asm/macintosh.h
+++ b/arch/m68k/include/asm/macintosh.h
@@ -57,7 +57,6 @@ struct mac_model
57#define MAC_SCSI_IIFX 5 57#define MAC_SCSI_IIFX 5
58#define MAC_SCSI_DUO 6 58#define MAC_SCSI_DUO 6
59#define MAC_SCSI_LC 7 59#define MAC_SCSI_LC 7
60#define MAC_SCSI_LATE 8
61 60
62#define MAC_IDE_NONE 0 61#define MAC_IDE_NONE 0
63#define MAC_IDE_QUADRA 1 62#define MAC_IDE_QUADRA 1
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index e644c4daf540..6bbe52025de3 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -18,7 +18,7 @@ extern unsigned long memory_end;
18#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 18#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
19 19
20#define __pa(vaddr) ((unsigned long)(vaddr)) 20#define __pa(vaddr) ((unsigned long)(vaddr))
21#define __va(paddr) ((void *)(paddr)) 21#define __va(paddr) ((void *)((unsigned long)(paddr)))
22 22
23#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) 23#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
24#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) 24#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 463572c4943f..e99993c57d6b 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -6,7 +6,7 @@
6 6
7#undef DEBUG 7#undef DEBUG
8 8
9#include <linux/dma-mapping.h> 9#include <linux/dma-noncoherent.h>
10#include <linux/device.h> 10#include <linux/device.h>
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/platform_device.h> 12#include <linux/platform_device.h>
@@ -19,7 +19,7 @@
19 19
20#if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE) 20#if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE)
21 21
22static void *m68k_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 22void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
23 gfp_t flag, unsigned long attrs) 23 gfp_t flag, unsigned long attrs)
24{ 24{
25 struct page *page, **map; 25 struct page *page, **map;
@@ -62,7 +62,7 @@ static void *m68k_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
62 return addr; 62 return addr;
63} 63}
64 64
65static void m68k_dma_free(struct device *dev, size_t size, void *addr, 65void arch_dma_free(struct device *dev, size_t size, void *addr,
66 dma_addr_t handle, unsigned long attrs) 66 dma_addr_t handle, unsigned long attrs)
67{ 67{
68 pr_debug("dma_free_coherent: %p, %x\n", addr, handle); 68 pr_debug("dma_free_coherent: %p, %x\n", addr, handle);
@@ -73,8 +73,8 @@ static void m68k_dma_free(struct device *dev, size_t size, void *addr,
73 73
74#include <asm/cacheflush.h> 74#include <asm/cacheflush.h>
75 75
76static void *m68k_dma_alloc(struct device *dev, size_t size, 76void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
77 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 77 gfp_t gfp, unsigned long attrs)
78{ 78{
79 void *ret; 79 void *ret;
80 80
@@ -89,7 +89,7 @@ static void *m68k_dma_alloc(struct device *dev, size_t size,
89 return ret; 89 return ret;
90} 90}
91 91
92static void m68k_dma_free(struct device *dev, size_t size, void *vaddr, 92void arch_dma_free(struct device *dev, size_t size, void *vaddr,
93 dma_addr_t dma_handle, unsigned long attrs) 93 dma_addr_t dma_handle, unsigned long attrs)
94{ 94{
95 free_pages((unsigned long)vaddr, get_order(size)); 95 free_pages((unsigned long)vaddr, get_order(size));
@@ -97,8 +97,8 @@ static void m68k_dma_free(struct device *dev, size_t size, void *vaddr,
97 97
98#endif /* CONFIG_MMU && !CONFIG_COLDFIRE */ 98#endif /* CONFIG_MMU && !CONFIG_COLDFIRE */
99 99
100static void m68k_dma_sync_single_for_device(struct device *dev, 100void arch_sync_dma_for_device(struct device *dev, phys_addr_t handle,
101 dma_addr_t handle, size_t size, enum dma_data_direction dir) 101 size_t size, enum dma_data_direction dir)
102{ 102{
103 switch (dir) { 103 switch (dir) {
104 case DMA_BIDIRECTIONAL: 104 case DMA_BIDIRECTIONAL:
@@ -115,58 +115,6 @@ static void m68k_dma_sync_single_for_device(struct device *dev,
115 } 115 }
116} 116}
117 117
118static void m68k_dma_sync_sg_for_device(struct device *dev,
119 struct scatterlist *sglist, int nents, enum dma_data_direction dir)
120{
121 int i;
122 struct scatterlist *sg;
123
124 for_each_sg(sglist, sg, nents, i) {
125 dma_sync_single_for_device(dev, sg->dma_address, sg->length,
126 dir);
127 }
128}
129
130static dma_addr_t m68k_dma_map_page(struct device *dev, struct page *page,
131 unsigned long offset, size_t size, enum dma_data_direction dir,
132 unsigned long attrs)
133{
134 dma_addr_t handle = page_to_phys(page) + offset;
135
136 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
137 dma_sync_single_for_device(dev, handle, size, dir);
138
139 return handle;
140}
141
142static int m68k_dma_map_sg(struct device *dev, struct scatterlist *sglist,
143 int nents, enum dma_data_direction dir, unsigned long attrs)
144{
145 int i;
146 struct scatterlist *sg;
147
148 for_each_sg(sglist, sg, nents, i) {
149 sg->dma_address = sg_phys(sg);
150
151 if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
152 continue;
153
154 dma_sync_single_for_device(dev, sg->dma_address, sg->length,
155 dir);
156 }
157 return nents;
158}
159
160const struct dma_map_ops m68k_dma_ops = {
161 .alloc = m68k_dma_alloc,
162 .free = m68k_dma_free,
163 .map_page = m68k_dma_map_page,
164 .map_sg = m68k_dma_map_sg,
165 .sync_single_for_device = m68k_dma_sync_single_for_device,
166 .sync_sg_for_device = m68k_dma_sync_sg_for_device,
167};
168EXPORT_SYMBOL(m68k_dma_ops);
169
170void arch_setup_pdev_archdata(struct platform_device *pdev) 118void arch_setup_pdev_archdata(struct platform_device *pdev)
171{ 119{
172 if (pdev->dev.coherent_dma_mask == DMA_MASK_NONE && 120 if (pdev->dev.coherent_dma_mask == DMA_MASK_NONE &&
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index f35e3ebd6331..5d3596c180f9 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -21,6 +21,7 @@
21#include <linux/string.h> 21#include <linux/string.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/memblock.h>
24#include <linux/proc_fs.h> 25#include <linux/proc_fs.h>
25#include <linux/seq_file.h> 26#include <linux/seq_file.h>
26#include <linux/module.h> 27#include <linux/module.h>
@@ -88,7 +89,6 @@ void (*mach_get_hardware_list) (struct seq_file *m);
88/* machine dependent timer functions */ 89/* machine dependent timer functions */
89int (*mach_hwclk) (int, struct rtc_time*); 90int (*mach_hwclk) (int, struct rtc_time*);
90EXPORT_SYMBOL(mach_hwclk); 91EXPORT_SYMBOL(mach_hwclk);
91int (*mach_set_clock_mmss) (unsigned long);
92unsigned int (*mach_get_ss)(void); 92unsigned int (*mach_get_ss)(void);
93int (*mach_get_rtc_pll)(struct rtc_pll_info *); 93int (*mach_get_rtc_pll)(struct rtc_pll_info *);
94int (*mach_set_rtc_pll)(struct rtc_pll_info *); 94int (*mach_set_rtc_pll)(struct rtc_pll_info *);
@@ -165,6 +165,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
165 be32_to_cpu(m->addr); 165 be32_to_cpu(m->addr);
166 m68k_memory[m68k_num_memory].size = 166 m68k_memory[m68k_num_memory].size =
167 be32_to_cpu(m->size); 167 be32_to_cpu(m->size);
168 memblock_add(m68k_memory[m68k_num_memory].addr,
169 m68k_memory[m68k_num_memory].size);
168 m68k_num_memory++; 170 m68k_num_memory++;
169 } else 171 } else
170 pr_warn("%s: too many memory chunks\n", 172 pr_warn("%s: too many memory chunks\n",
@@ -224,10 +226,6 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
224 226
225void __init setup_arch(char **cmdline_p) 227void __init setup_arch(char **cmdline_p)
226{ 228{
227#ifndef CONFIG_SUN3
228 int i;
229#endif
230
231 /* The bootinfo is located right after the kernel */ 229 /* The bootinfo is located right after the kernel */
232 if (!CPU_IS_COLDFIRE) 230 if (!CPU_IS_COLDFIRE)
233 m68k_parse_bootinfo((const struct bi_record *)_end); 231 m68k_parse_bootinfo((const struct bi_record *)_end);
@@ -356,14 +354,9 @@ void __init setup_arch(char **cmdline_p)
356#endif 354#endif
357 355
358#ifndef CONFIG_SUN3 356#ifndef CONFIG_SUN3
359 for (i = 1; i < m68k_num_memory; i++)
360 free_bootmem_node(NODE_DATA(i), m68k_memory[i].addr,
361 m68k_memory[i].size);
362#ifdef CONFIG_BLK_DEV_INITRD 357#ifdef CONFIG_BLK_DEV_INITRD
363 if (m68k_ramdisk.size) { 358 if (m68k_ramdisk.size) {
364 reserve_bootmem_node(__virt_to_node(phys_to_virt(m68k_ramdisk.addr)), 359 memblock_reserve(m68k_ramdisk.addr, m68k_ramdisk.size);
365 m68k_ramdisk.addr, m68k_ramdisk.size,
366 BOOTMEM_DEFAULT);
367 initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr); 360 initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr);
368 initrd_end = initrd_start + m68k_ramdisk.size; 361 initrd_end = initrd_start + m68k_ramdisk.size;
369 pr_info("initrd: %08lx - %08lx\n", initrd_start, initrd_end); 362 pr_info("initrd: %08lx - %08lx\n", initrd_start, initrd_end);
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index a98af1018201..cfd5475bfc31 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -28,6 +28,7 @@
28#include <linux/errno.h> 28#include <linux/errno.h>
29#include <linux/string.h> 29#include <linux/string.h>
30#include <linux/bootmem.h> 30#include <linux/bootmem.h>
31#include <linux/memblock.h>
31#include <linux/seq_file.h> 32#include <linux/seq_file.h>
32#include <linux/init.h> 33#include <linux/init.h>
33#include <linux/initrd.h> 34#include <linux/initrd.h>
@@ -51,7 +52,6 @@ char __initdata command_line[COMMAND_LINE_SIZE];
51 52
52/* machine dependent timer functions */ 53/* machine dependent timer functions */
53void (*mach_sched_init)(irq_handler_t handler) __initdata = NULL; 54void (*mach_sched_init)(irq_handler_t handler) __initdata = NULL;
54int (*mach_set_clock_mmss)(unsigned long);
55int (*mach_hwclk) (int, struct rtc_time*); 55int (*mach_hwclk) (int, struct rtc_time*);
56 56
57/* machine dependent reboot functions */ 57/* machine dependent reboot functions */
@@ -86,8 +86,6 @@ void (*mach_power_off)(void);
86 86
87void __init setup_arch(char **cmdline_p) 87void __init setup_arch(char **cmdline_p)
88{ 88{
89 int bootmap_size;
90
91 memory_start = PAGE_ALIGN(_ramstart); 89 memory_start = PAGE_ALIGN(_ramstart);
92 memory_end = _ramend; 90 memory_end = _ramend;
93 91
@@ -142,6 +140,8 @@ void __init setup_arch(char **cmdline_p)
142 pr_debug("MEMORY -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx\n ", 140 pr_debug("MEMORY -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx\n ",
143 __bss_stop, memory_start, memory_start, memory_end); 141 __bss_stop, memory_start, memory_start, memory_end);
144 142
143 memblock_add(memory_start, memory_end - memory_start);
144
145 /* Keep a copy of command line */ 145 /* Keep a copy of command line */
146 *cmdline_p = &command_line[0]; 146 *cmdline_p = &command_line[0];
147 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 147 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
@@ -158,23 +158,10 @@ void __init setup_arch(char **cmdline_p)
158 min_low_pfn = PFN_DOWN(memory_start); 158 min_low_pfn = PFN_DOWN(memory_start);
159 max_pfn = max_low_pfn = PFN_DOWN(memory_end); 159 max_pfn = max_low_pfn = PFN_DOWN(memory_end);
160 160
161 bootmap_size = init_bootmem_node(
162 NODE_DATA(0),
163 min_low_pfn, /* map goes here */
164 PFN_DOWN(PAGE_OFFSET),
165 max_pfn);
166 /*
167 * Free the usable memory, we have to make sure we do not free
168 * the bootmem bitmap so we then reserve it after freeing it :-)
169 */
170 free_bootmem(memory_start, memory_end - memory_start);
171 reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT);
172
173#if defined(CONFIG_UBOOT) && defined(CONFIG_BLK_DEV_INITRD) 161#if defined(CONFIG_UBOOT) && defined(CONFIG_BLK_DEV_INITRD)
174 if ((initrd_start > 0) && (initrd_start < initrd_end) && 162 if ((initrd_start > 0) && (initrd_start < initrd_end) &&
175 (initrd_end < memory_end)) 163 (initrd_end < memory_end))
176 reserve_bootmem(initrd_start, initrd_end - initrd_start, 164 memblock_reserve(initrd_start, initrd_end - initrd_start);
177 BOOTMEM_DEFAULT);
178#endif /* if defined(CONFIG_BLK_DEV_INITRD) */ 165#endif /* if defined(CONFIG_BLK_DEV_INITRD) */
179 166
180 /* 167 /*
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index e522307db47c..b02d7254b73a 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -57,7 +57,6 @@ static unsigned long mac_orig_videoaddr;
57/* Mac specific timer functions */ 57/* Mac specific timer functions */
58extern u32 mac_gettimeoffset(void); 58extern u32 mac_gettimeoffset(void);
59extern int mac_hwclk(int, struct rtc_time *); 59extern int mac_hwclk(int, struct rtc_time *);
60extern int mac_set_clock_mmss(unsigned long);
61extern void iop_preinit(void); 60extern void iop_preinit(void);
62extern void iop_init(void); 61extern void iop_init(void);
63extern void via_init(void); 62extern void via_init(void);
@@ -158,7 +157,6 @@ void __init config_mac(void)
158 mach_get_model = mac_get_model; 157 mach_get_model = mac_get_model;
159 arch_gettimeoffset = mac_gettimeoffset; 158 arch_gettimeoffset = mac_gettimeoffset;
160 mach_hwclk = mac_hwclk; 159 mach_hwclk = mac_hwclk;
161 mach_set_clock_mmss = mac_set_clock_mmss;
162 mach_reset = mac_reset; 160 mach_reset = mac_reset;
163 mach_halt = mac_poweroff; 161 mach_halt = mac_poweroff;
164 mach_power_off = mac_poweroff; 162 mach_power_off = mac_poweroff;
@@ -709,7 +707,7 @@ static struct mac_model mac_data_table[] = {
709 .name = "PowerBook 520", 707 .name = "PowerBook 520",
710 .adb_type = MAC_ADB_PB2, 708 .adb_type = MAC_ADB_PB2,
711 .via_type = MAC_VIA_QUADRA, 709 .via_type = MAC_VIA_QUADRA,
712 .scsi_type = MAC_SCSI_LATE, 710 .scsi_type = MAC_SCSI_OLD,
713 .scc_type = MAC_SCC_QUADRA, 711 .scc_type = MAC_SCC_QUADRA,
714 .ether_type = MAC_ETHER_SONIC, 712 .ether_type = MAC_ETHER_SONIC,
715 .floppy_type = MAC_FLOPPY_SWIM_ADDR2, 713 .floppy_type = MAC_FLOPPY_SWIM_ADDR2,
@@ -943,18 +941,6 @@ static const struct resource mac_scsi_old_rsrc[] __initconst = {
943 }, 941 },
944}; 942};
945 943
946static const struct resource mac_scsi_late_rsrc[] __initconst = {
947 {
948 .flags = IORESOURCE_IRQ,
949 .start = IRQ_MAC_SCSI,
950 .end = IRQ_MAC_SCSI,
951 }, {
952 .flags = IORESOURCE_MEM,
953 .start = 0x50010000,
954 .end = 0x50011FFF,
955 },
956};
957
958static const struct resource mac_scsi_ccl_rsrc[] __initconst = { 944static const struct resource mac_scsi_ccl_rsrc[] __initconst = {
959 { 945 {
960 .flags = IORESOURCE_IRQ, 946 .flags = IORESOURCE_IRQ,
@@ -1064,11 +1050,6 @@ int __init mac_platform_init(void)
1064 platform_device_register_simple("mac_scsi", 0, 1050 platform_device_register_simple("mac_scsi", 0,
1065 mac_scsi_old_rsrc, ARRAY_SIZE(mac_scsi_old_rsrc)); 1051 mac_scsi_old_rsrc, ARRAY_SIZE(mac_scsi_old_rsrc));
1066 break; 1052 break;
1067 case MAC_SCSI_LATE:
1068 /* XXX PDMA support for PowerBook 500 series needs testing */
1069 platform_device_register_simple("mac_scsi", 0,
1070 mac_scsi_late_rsrc, ARRAY_SIZE(mac_scsi_late_rsrc));
1071 break;
1072 case MAC_SCSI_LC: 1053 case MAC_SCSI_LC:
1073 /* Addresses from Mac LC data in Designing Cards & Drivers 3ed. 1054 /* Addresses from Mac LC data in Designing Cards & Drivers 3ed.
1074 * Also from the Developer Notes for Classic II, LC III, 1055 * Also from the Developer Notes for Classic II, LC III,
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index c68054361615..19e9d8eef1f2 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -26,33 +26,38 @@
26 26
27#include <asm/machdep.h> 27#include <asm/machdep.h>
28 28
29/* Offset between Unix time (1970-based) and Mac time (1904-based) */ 29/*
30 * Offset between Unix time (1970-based) and Mac time (1904-based). Cuda and PMU
31 * times wrap in 2040. If we need to handle later times, the read_time functions
32 * need to be changed to interpret wrapped times as post-2040.
33 */
30 34
31#define RTC_OFFSET 2082844800 35#define RTC_OFFSET 2082844800
32 36
33static void (*rom_reset)(void); 37static void (*rom_reset)(void);
34 38
35#ifdef CONFIG_ADB_CUDA 39#ifdef CONFIG_ADB_CUDA
36static long cuda_read_time(void) 40static time64_t cuda_read_time(void)
37{ 41{
38 struct adb_request req; 42 struct adb_request req;
39 long time; 43 time64_t time;
40 44
41 if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0) 45 if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
42 return 0; 46 return 0;
43 while (!req.complete) 47 while (!req.complete)
44 cuda_poll(); 48 cuda_poll();
45 49
46 time = (req.reply[3] << 24) | (req.reply[4] << 16) | 50 time = (u32)((req.reply[3] << 24) | (req.reply[4] << 16) |
47 (req.reply[5] << 8) | req.reply[6]; 51 (req.reply[5] << 8) | req.reply[6]);
52
48 return time - RTC_OFFSET; 53 return time - RTC_OFFSET;
49} 54}
50 55
51static void cuda_write_time(long data) 56static void cuda_write_time(time64_t time)
52{ 57{
53 struct adb_request req; 58 struct adb_request req;
59 u32 data = lower_32_bits(time + RTC_OFFSET);
54 60
55 data += RTC_OFFSET;
56 if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME, 61 if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
57 (data >> 24) & 0xFF, (data >> 16) & 0xFF, 62 (data >> 24) & 0xFF, (data >> 16) & 0xFF,
58 (data >> 8) & 0xFF, data & 0xFF) < 0) 63 (data >> 8) & 0xFF, data & 0xFF) < 0)
@@ -86,26 +91,27 @@ static void cuda_write_pram(int offset, __u8 data)
86#endif /* CONFIG_ADB_CUDA */ 91#endif /* CONFIG_ADB_CUDA */
87 92
88#ifdef CONFIG_ADB_PMU68K 93#ifdef CONFIG_ADB_PMU68K
89static long pmu_read_time(void) 94static time64_t pmu_read_time(void)
90{ 95{
91 struct adb_request req; 96 struct adb_request req;
92 long time; 97 time64_t time;
93 98
94 if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) 99 if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
95 return 0; 100 return 0;
96 while (!req.complete) 101 while (!req.complete)
97 pmu_poll(); 102 pmu_poll();
98 103
99 time = (req.reply[1] << 24) | (req.reply[2] << 16) | 104 time = (u32)((req.reply[1] << 24) | (req.reply[2] << 16) |
100 (req.reply[3] << 8) | req.reply[4]; 105 (req.reply[3] << 8) | req.reply[4]);
106
101 return time - RTC_OFFSET; 107 return time - RTC_OFFSET;
102} 108}
103 109
104static void pmu_write_time(long data) 110static void pmu_write_time(time64_t time)
105{ 111{
106 struct adb_request req; 112 struct adb_request req;
113 u32 data = lower_32_bits(time + RTC_OFFSET);
107 114
108 data += RTC_OFFSET;
109 if (pmu_request(&req, NULL, 5, PMU_SET_RTC, 115 if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
110 (data >> 24) & 0xFF, (data >> 16) & 0xFF, 116 (data >> 24) & 0xFF, (data >> 16) & 0xFF,
111 (data >> 8) & 0xFF, data & 0xFF) < 0) 117 (data >> 8) & 0xFF, data & 0xFF) < 0)
@@ -245,11 +251,11 @@ static void via_write_pram(int offset, __u8 data)
245 * is basically any machine with Mac II-style ADB. 251 * is basically any machine with Mac II-style ADB.
246 */ 252 */
247 253
248static long via_read_time(void) 254static time64_t via_read_time(void)
249{ 255{
250 union { 256 union {
251 __u8 cdata[4]; 257 __u8 cdata[4];
252 long idata; 258 __u32 idata;
253 } result, last_result; 259 } result, last_result;
254 int count = 1; 260 int count = 1;
255 261
@@ -270,7 +276,7 @@ static long via_read_time(void)
270 via_pram_command(0x8D, &result.cdata[0]); 276 via_pram_command(0x8D, &result.cdata[0]);
271 277
272 if (result.idata == last_result.idata) 278 if (result.idata == last_result.idata)
273 return result.idata - RTC_OFFSET; 279 return (time64_t)result.idata - RTC_OFFSET;
274 280
275 if (++count > 10) 281 if (++count > 10)
276 break; 282 break;
@@ -278,8 +284,8 @@ static long via_read_time(void)
278 last_result.idata = result.idata; 284 last_result.idata = result.idata;
279 } 285 }
280 286
281 pr_err("via_read_time: failed to read a stable value; got 0x%08lx then 0x%08lx\n", 287 pr_err("%s: failed to read a stable value; got 0x%08x then 0x%08x\n",
282 last_result.idata, result.idata); 288 __func__, last_result.idata, result.idata);
283 289
284 return 0; 290 return 0;
285} 291}
@@ -291,11 +297,11 @@ static long via_read_time(void)
291 * is basically any machine with Mac II-style ADB. 297 * is basically any machine with Mac II-style ADB.
292 */ 298 */
293 299
294static void via_write_time(long time) 300static void via_write_time(time64_t time)
295{ 301{
296 union { 302 union {
297 __u8 cdata[4]; 303 __u8 cdata[4];
298 long idata; 304 __u32 idata;
299 } data; 305 } data;
300 __u8 temp; 306 __u8 temp;
301 307
@@ -304,7 +310,7 @@ static void via_write_time(long time)
304 temp = 0x55; 310 temp = 0x55;
305 via_pram_command(0x35, &temp); 311 via_pram_command(0x35, &temp);
306 312
307 data.idata = time + RTC_OFFSET; 313 data.idata = lower_32_bits(time + RTC_OFFSET);
308 via_pram_command(0x01, &data.cdata[3]); 314 via_pram_command(0x01, &data.cdata[3]);
309 via_pram_command(0x05, &data.cdata[2]); 315 via_pram_command(0x05, &data.cdata[2]);
310 via_pram_command(0x09, &data.cdata[1]); 316 via_pram_command(0x09, &data.cdata[1]);
@@ -585,12 +591,15 @@ void mac_reset(void)
585 * This function translates seconds since 1970 into a proper date. 591 * This function translates seconds since 1970 into a proper date.
586 * 592 *
587 * Algorithm cribbed from glibc2.1, __offtime(). 593 * Algorithm cribbed from glibc2.1, __offtime().
594 *
595 * This is roughly same as rtc_time64_to_tm(), which we should probably
596 * use here, but it's only available when CONFIG_RTC_LIB is enabled.
588 */ 597 */
589#define SECS_PER_MINUTE (60) 598#define SECS_PER_MINUTE (60)
590#define SECS_PER_HOUR (SECS_PER_MINUTE * 60) 599#define SECS_PER_HOUR (SECS_PER_MINUTE * 60)
591#define SECS_PER_DAY (SECS_PER_HOUR * 24) 600#define SECS_PER_DAY (SECS_PER_HOUR * 24)
592 601
593static void unmktime(unsigned long time, long offset, 602static void unmktime(time64_t time, long offset,
594 int *yearp, int *monp, int *dayp, 603 int *yearp, int *monp, int *dayp,
595 int *hourp, int *minp, int *secp) 604 int *hourp, int *minp, int *secp)
596{ 605{
@@ -602,11 +611,10 @@ static void unmktime(unsigned long time, long offset,
602 /* Leap years. */ 611 /* Leap years. */
603 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } 612 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
604 }; 613 };
605 long int days, rem, y, wday, yday; 614 int days, rem, y, wday, yday;
606 const unsigned short int *ip; 615 const unsigned short int *ip;
607 616
608 days = time / SECS_PER_DAY; 617 days = div_u64_rem(time, SECS_PER_DAY, &rem);
609 rem = time % SECS_PER_DAY;
610 rem += offset; 618 rem += offset;
611 while (rem < 0) { 619 while (rem < 0) {
612 rem += SECS_PER_DAY; 620 rem += SECS_PER_DAY;
@@ -657,7 +665,7 @@ static void unmktime(unsigned long time, long offset,
657 665
658int mac_hwclk(int op, struct rtc_time *t) 666int mac_hwclk(int op, struct rtc_time *t)
659{ 667{
660 unsigned long now; 668 time64_t now;
661 669
662 if (!op) { /* read */ 670 if (!op) { /* read */
663 switch (macintosh_config->adb_type) { 671 switch (macintosh_config->adb_type) {
@@ -693,8 +701,8 @@ int mac_hwclk(int op, struct rtc_time *t)
693 __func__, t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, 701 __func__, t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
694 t->tm_hour, t->tm_min, t->tm_sec); 702 t->tm_hour, t->tm_min, t->tm_sec);
695 703
696 now = mktime(t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, 704 now = mktime64(t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
697 t->tm_hour, t->tm_min, t->tm_sec); 705 t->tm_hour, t->tm_min, t->tm_sec);
698 706
699 switch (macintosh_config->adb_type) { 707 switch (macintosh_config->adb_type) {
700 case MAC_ADB_IOP: 708 case MAC_ADB_IOP:
@@ -719,19 +727,3 @@ int mac_hwclk(int op, struct rtc_time *t)
719 } 727 }
720 return 0; 728 return 0;
721} 729}
722
723/*
724 * Set minutes/seconds in the hardware clock
725 */
726
727int mac_set_clock_mmss (unsigned long nowtime)
728{
729 struct rtc_time now;
730
731 mac_hwclk(0, &now);
732 now.tm_sec = nowtime % 60;
733 now.tm_min = (nowtime / 60) % 60;
734 mac_hwclk(1, &now);
735
736 return 0;
737}
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 8827b7f91402..38e2b272c220 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -71,7 +71,6 @@ void __init m68k_setup_node(int node)
71 pg_data_table[i] = pg_data_map + node; 71 pg_data_table[i] = pg_data_map + node;
72 } 72 }
73#endif 73#endif
74 pg_data_map[node].bdata = bootmem_node_data + node;
75 node_set_online(node); 74 node_set_online(node);
76} 75}
77 76
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 2925d795d71a..70dde040779b 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -14,6 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/bootmem.h> 16#include <linux/bootmem.h>
17#include <linux/memblock.h>
17 18
18#include <asm/setup.h> 19#include <asm/setup.h>
19#include <asm/page.h> 20#include <asm/page.h>
@@ -153,31 +154,31 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
153 154
154void __init cf_bootmem_alloc(void) 155void __init cf_bootmem_alloc(void)
155{ 156{
156 unsigned long start_pfn;
157 unsigned long memstart; 157 unsigned long memstart;
158 158
159 /* _rambase and _ramend will be naturally page aligned */ 159 /* _rambase and _ramend will be naturally page aligned */
160 m68k_memory[0].addr = _rambase; 160 m68k_memory[0].addr = _rambase;
161 m68k_memory[0].size = _ramend - _rambase; 161 m68k_memory[0].size = _ramend - _rambase;
162 162
163 memblock_add(m68k_memory[0].addr, m68k_memory[0].size);
164
163 /* compute total pages in system */ 165 /* compute total pages in system */
164 num_pages = PFN_DOWN(_ramend - _rambase); 166 num_pages = PFN_DOWN(_ramend - _rambase);
165 167
166 /* page numbers */ 168 /* page numbers */
167 memstart = PAGE_ALIGN(_ramstart); 169 memstart = PAGE_ALIGN(_ramstart);
168 min_low_pfn = PFN_DOWN(_rambase); 170 min_low_pfn = PFN_DOWN(_rambase);
169 start_pfn = PFN_DOWN(memstart);
170 max_pfn = max_low_pfn = PFN_DOWN(_ramend); 171 max_pfn = max_low_pfn = PFN_DOWN(_ramend);
171 high_memory = (void *)_ramend; 172 high_memory = (void *)_ramend;
172 173
174 /* Reserve kernel text/data/bss */
175 memblock_reserve(memstart, memstart - _rambase);
176
173 m68k_virt_to_node_shift = fls(_ramend - 1) - 6; 177 m68k_virt_to_node_shift = fls(_ramend - 1) - 6;
174 module_fixup(NULL, __start_fixup, __stop_fixup); 178 module_fixup(NULL, __start_fixup, __stop_fixup);
175 179
176 /* setup bootmem data */ 180 /* setup node data */
177 m68k_setup_node(0); 181 m68k_setup_node(0);
178 memstart += init_bootmem_node(NODE_DATA(0), start_pfn,
179 min_low_pfn, max_low_pfn);
180 free_bootmem_node(NODE_DATA(0), memstart, _ramend - memstart);
181} 182}
182 183
183/* 184/*
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index e490ecc7842c..4e17ecb5928a 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -19,6 +19,7 @@
19#include <linux/types.h> 19#include <linux/types.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/bootmem.h> 21#include <linux/bootmem.h>
22#include <linux/memblock.h>
22#include <linux/gfp.h> 23#include <linux/gfp.h>
23 24
24#include <asm/setup.h> 25#include <asm/setup.h>
@@ -208,7 +209,7 @@ void __init paging_init(void)
208{ 209{
209 unsigned long zones_size[MAX_NR_ZONES] = { 0, }; 210 unsigned long zones_size[MAX_NR_ZONES] = { 0, };
210 unsigned long min_addr, max_addr; 211 unsigned long min_addr, max_addr;
211 unsigned long addr, size, end; 212 unsigned long addr;
212 int i; 213 int i;
213 214
214#ifdef DEBUG 215#ifdef DEBUG
@@ -253,34 +254,20 @@ void __init paging_init(void)
253 min_low_pfn = availmem >> PAGE_SHIFT; 254 min_low_pfn = availmem >> PAGE_SHIFT;
254 max_pfn = max_low_pfn = max_addr >> PAGE_SHIFT; 255 max_pfn = max_low_pfn = max_addr >> PAGE_SHIFT;
255 256
256 for (i = 0; i < m68k_num_memory; i++) { 257 /* Reserve kernel text/data/bss and the memory allocated in head.S */
257 addr = m68k_memory[i].addr; 258 memblock_reserve(m68k_memory[0].addr, availmem - m68k_memory[0].addr);
258 end = addr + m68k_memory[i].size;
259 m68k_setup_node(i);
260 availmem = PAGE_ALIGN(availmem);
261 availmem += init_bootmem_node(NODE_DATA(i),
262 availmem >> PAGE_SHIFT,
263 addr >> PAGE_SHIFT,
264 end >> PAGE_SHIFT);
265 }
266 259
267 /* 260 /*
268 * Map the physical memory available into the kernel virtual 261 * Map the physical memory available into the kernel virtual
269 * address space. First initialize the bootmem allocator with 262 * address space. Make sure memblock will not try to allocate
270 * the memory we already mapped, so map_node() has something 263 * pages beyond the memory we already mapped in head.S
271 * to allocate.
272 */ 264 */
273 addr = m68k_memory[0].addr; 265 memblock_set_bottom_up(true);
274 size = m68k_memory[0].size; 266
275 free_bootmem_node(NODE_DATA(0), availmem, 267 for (i = 0; i < m68k_num_memory; i++) {
276 min(m68k_init_mapped_size, size) - (availmem - addr)); 268 m68k_setup_node(i);
277 map_node(0);
278 if (size > m68k_init_mapped_size)
279 free_bootmem_node(NODE_DATA(0), addr + m68k_init_mapped_size,
280 size - m68k_init_mapped_size);
281
282 for (i = 1; i < m68k_num_memory; i++)
283 map_node(i); 269 map_node(i);
270 }
284 271
285 flush_tlb_all(); 272 flush_tlb_all();
286 273
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index f8a710fd84cd..adea549d240e 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -40,7 +40,6 @@ static void mvme147_get_model(char *model);
40extern void mvme147_sched_init(irq_handler_t handler); 40extern void mvme147_sched_init(irq_handler_t handler);
41extern u32 mvme147_gettimeoffset(void); 41extern u32 mvme147_gettimeoffset(void);
42extern int mvme147_hwclk (int, struct rtc_time *); 42extern int mvme147_hwclk (int, struct rtc_time *);
43extern int mvme147_set_clock_mmss (unsigned long);
44extern void mvme147_reset (void); 43extern void mvme147_reset (void);
45 44
46 45
@@ -92,7 +91,6 @@ void __init config_mvme147(void)
92 mach_init_IRQ = mvme147_init_IRQ; 91 mach_init_IRQ = mvme147_init_IRQ;
93 arch_gettimeoffset = mvme147_gettimeoffset; 92 arch_gettimeoffset = mvme147_gettimeoffset;
94 mach_hwclk = mvme147_hwclk; 93 mach_hwclk = mvme147_hwclk;
95 mach_set_clock_mmss = mvme147_set_clock_mmss;
96 mach_reset = mvme147_reset; 94 mach_reset = mvme147_reset;
97 mach_get_model = mvme147_get_model; 95 mach_get_model = mvme147_get_model;
98 96
@@ -164,8 +162,3 @@ int mvme147_hwclk(int op, struct rtc_time *t)
164 } 162 }
165 return 0; 163 return 0;
166} 164}
167
168int mvme147_set_clock_mmss (unsigned long nowtime)
169{
170 return 0;
171}
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 4ffd9ef98de4..6ee36a5b528d 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -46,7 +46,6 @@ static void mvme16x_get_model(char *model);
46extern void mvme16x_sched_init(irq_handler_t handler); 46extern void mvme16x_sched_init(irq_handler_t handler);
47extern u32 mvme16x_gettimeoffset(void); 47extern u32 mvme16x_gettimeoffset(void);
48extern int mvme16x_hwclk (int, struct rtc_time *); 48extern int mvme16x_hwclk (int, struct rtc_time *);
49extern int mvme16x_set_clock_mmss (unsigned long);
50extern void mvme16x_reset (void); 49extern void mvme16x_reset (void);
51 50
52int bcd2int (unsigned char b); 51int bcd2int (unsigned char b);
@@ -280,7 +279,6 @@ void __init config_mvme16x(void)
280 mach_init_IRQ = mvme16x_init_IRQ; 279 mach_init_IRQ = mvme16x_init_IRQ;
281 arch_gettimeoffset = mvme16x_gettimeoffset; 280 arch_gettimeoffset = mvme16x_gettimeoffset;
282 mach_hwclk = mvme16x_hwclk; 281 mach_hwclk = mvme16x_hwclk;
283 mach_set_clock_mmss = mvme16x_set_clock_mmss;
284 mach_reset = mvme16x_reset; 282 mach_reset = mvme16x_reset;
285 mach_get_model = mvme16x_get_model; 283 mach_get_model = mvme16x_get_model;
286 mach_get_hardware_list = mvme16x_get_hardware_list; 284 mach_get_hardware_list = mvme16x_get_hardware_list;
@@ -411,9 +409,3 @@ int mvme16x_hwclk(int op, struct rtc_time *t)
411 } 409 }
412 return 0; 410 return 0;
413} 411}
414
415int mvme16x_set_clock_mmss (unsigned long nowtime)
416{
417 return 0;
418}
419
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 71c0867ecf20..96810d91da2b 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -43,7 +43,6 @@ extern void q40_sched_init(irq_handler_t handler);
43static u32 q40_gettimeoffset(void); 43static u32 q40_gettimeoffset(void);
44static int q40_hwclk(int, struct rtc_time *); 44static int q40_hwclk(int, struct rtc_time *);
45static unsigned int q40_get_ss(void); 45static unsigned int q40_get_ss(void);
46static int q40_set_clock_mmss(unsigned long);
47static int q40_get_rtc_pll(struct rtc_pll_info *pll); 46static int q40_get_rtc_pll(struct rtc_pll_info *pll);
48static int q40_set_rtc_pll(struct rtc_pll_info *pll); 47static int q40_set_rtc_pll(struct rtc_pll_info *pll);
49 48
@@ -175,7 +174,6 @@ void __init config_q40(void)
175 mach_get_ss = q40_get_ss; 174 mach_get_ss = q40_get_ss;
176 mach_get_rtc_pll = q40_get_rtc_pll; 175 mach_get_rtc_pll = q40_get_rtc_pll;
177 mach_set_rtc_pll = q40_set_rtc_pll; 176 mach_set_rtc_pll = q40_set_rtc_pll;
178 mach_set_clock_mmss = q40_set_clock_mmss;
179 177
180 mach_reset = q40_reset; 178 mach_reset = q40_reset;
181 mach_get_model = q40_get_model; 179 mach_get_model = q40_get_model;
@@ -267,34 +265,6 @@ static unsigned int q40_get_ss(void)
267 return bcd2bin(Q40_RTC_SECS); 265 return bcd2bin(Q40_RTC_SECS);
268} 266}
269 267
270/*
271 * Set the minutes and seconds from seconds value 'nowtime'. Fail if
272 * clock is out by > 30 minutes. Logic lifted from atari code.
273 */
274
275static int q40_set_clock_mmss(unsigned long nowtime)
276{
277 int retval = 0;
278 short real_seconds = nowtime % 60, real_minutes = (nowtime / 60) % 60;
279
280 int rtc_minutes;
281
282 rtc_minutes = bcd2bin(Q40_RTC_MINS);
283
284 if ((rtc_minutes < real_minutes ?
285 real_minutes - rtc_minutes :
286 rtc_minutes - real_minutes) < 30) {
287 Q40_RTC_CTRL |= Q40_RTC_WRITE;
288 Q40_RTC_MINS = bin2bcd(real_minutes);
289 Q40_RTC_SECS = bin2bcd(real_seconds);
290 Q40_RTC_CTRL &= ~(Q40_RTC_WRITE);
291 } else
292 retval = -1;
293
294 return retval;
295}
296
297
298/* get and set PLL calibration of RTC clock */ 268/* get and set PLL calibration of RTC clock */
299#define Q40_RTC_PLL_MASK ((1<<5)-1) 269#define Q40_RTC_PLL_MASK ((1<<5)-1)
300#define Q40_RTC_PLL_SIGN (1<<5) 270#define Q40_RTC_PLL_SIGN (1<<5)
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index 1d28d380e8cc..79a2bb857906 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -123,10 +123,6 @@ static void __init sun3_bootmem_alloc(unsigned long memory_start,
123 availmem = memory_start; 123 availmem = memory_start;
124 124
125 m68k_setup_node(0); 125 m68k_setup_node(0);
126 availmem += init_bootmem(start_page, num_pages);
127 availmem = (availmem + (PAGE_SIZE-1)) & PAGE_MASK;
128
129 free_bootmem(__pa(availmem), memory_end - (availmem));
130} 126}
131 127
132 128
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 0ab176bdb8e8..79be687de4ab 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -274,97 +274,12 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
274#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) 274#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
275#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) 275#define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
276 276
277/**
278 * __atomic_add_unless - add unless the number is a given value
279 * @v: pointer of type atomic_t
280 * @a: the amount to add to v...
281 * @u: ...unless v is equal to u.
282 *
283 * Atomically adds @a to @v, so long as it was not @u.
284 * Returns the old value of @v.
285 */
286static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
287{
288 int c, old;
289 c = atomic_read(v);
290 for (;;) {
291 if (unlikely(c == (u)))
292 break;
293 old = atomic_cmpxchg((v), c, c + (a));
294 if (likely(old == c))
295 break;
296 c = old;
297 }
298 return c;
299}
300
301#define atomic_dec_return(v) atomic_sub_return(1, (v))
302#define atomic_inc_return(v) atomic_add_return(1, (v))
303
304/*
305 * atomic_sub_and_test - subtract value from variable and test result
306 * @i: integer value to subtract
307 * @v: pointer of type atomic_t
308 *
309 * Atomically subtracts @i from @v and returns
310 * true if the result is zero, or false for all
311 * other cases.
312 */
313#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
314
315/*
316 * atomic_inc_and_test - increment and test
317 * @v: pointer of type atomic_t
318 *
319 * Atomically increments @v by 1
320 * and returns true if the result is zero, or false for all
321 * other cases.
322 */
323#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
324
325/*
326 * atomic_dec_and_test - decrement by 1 and test
327 * @v: pointer of type atomic_t
328 *
329 * Atomically decrements @v by 1 and
330 * returns true if the result is 0, or false for all other
331 * cases.
332 */
333#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
334
335/* 277/*
336 * atomic_dec_if_positive - decrement by 1 if old value positive 278 * atomic_dec_if_positive - decrement by 1 if old value positive
337 * @v: pointer of type atomic_t 279 * @v: pointer of type atomic_t
338 */ 280 */
339#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) 281#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v)
340 282
341/*
342 * atomic_inc - increment atomic variable
343 * @v: pointer of type atomic_t
344 *
345 * Atomically increments @v by 1.
346 */
347#define atomic_inc(v) atomic_add(1, (v))
348
349/*
350 * atomic_dec - decrement and test
351 * @v: pointer of type atomic_t
352 *
353 * Atomically decrements @v by 1.
354 */
355#define atomic_dec(v) atomic_sub(1, (v))
356
357/*
358 * atomic_add_negative - add and test if negative
359 * @v: pointer of type atomic_t
360 * @i: integer value to add
361 *
362 * Atomically adds @i to @v and returns true
363 * if the result is negative, or false when
364 * result is greater than or equal to zero.
365 */
366#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
367
368#ifdef CONFIG_64BIT 283#ifdef CONFIG_64BIT
369 284
370#define ATOMIC64_INIT(i) { (i) } 285#define ATOMIC64_INIT(i) { (i) }
@@ -620,99 +535,12 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
620 ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) 535 ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
621#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new))) 536#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
622 537
623/**
624 * atomic64_add_unless - add unless the number is a given value
625 * @v: pointer of type atomic64_t
626 * @a: the amount to add to v...
627 * @u: ...unless v is equal to u.
628 *
629 * Atomically adds @a to @v, so long as it was not @u.
630 * Returns true iff @v was not @u.
631 */
632static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
633{
634 long c, old;
635 c = atomic64_read(v);
636 for (;;) {
637 if (unlikely(c == (u)))
638 break;
639 old = atomic64_cmpxchg((v), c, c + (a));
640 if (likely(old == c))
641 break;
642 c = old;
643 }
644 return c != (u);
645}
646
647#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
648
649#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
650#define atomic64_inc_return(v) atomic64_add_return(1, (v))
651
652/*
653 * atomic64_sub_and_test - subtract value from variable and test result
654 * @i: integer value to subtract
655 * @v: pointer of type atomic64_t
656 *
657 * Atomically subtracts @i from @v and returns
658 * true if the result is zero, or false for all
659 * other cases.
660 */
661#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
662
663/*
664 * atomic64_inc_and_test - increment and test
665 * @v: pointer of type atomic64_t
666 *
667 * Atomically increments @v by 1
668 * and returns true if the result is zero, or false for all
669 * other cases.
670 */
671#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
672
673/*
674 * atomic64_dec_and_test - decrement by 1 and test
675 * @v: pointer of type atomic64_t
676 *
677 * Atomically decrements @v by 1 and
678 * returns true if the result is 0, or false for all other
679 * cases.
680 */
681#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0)
682
683/* 538/*
684 * atomic64_dec_if_positive - decrement by 1 if old value positive 539 * atomic64_dec_if_positive - decrement by 1 if old value positive
685 * @v: pointer of type atomic64_t 540 * @v: pointer of type atomic64_t
686 */ 541 */
687#define atomic64_dec_if_positive(v) atomic64_sub_if_positive(1, v) 542#define atomic64_dec_if_positive(v) atomic64_sub_if_positive(1, v)
688 543
689/*
690 * atomic64_inc - increment atomic variable
691 * @v: pointer of type atomic64_t
692 *
693 * Atomically increments @v by 1.
694 */
695#define atomic64_inc(v) atomic64_add(1, (v))
696
697/*
698 * atomic64_dec - decrement and test
699 * @v: pointer of type atomic64_t
700 *
701 * Atomically decrements @v by 1.
702 */
703#define atomic64_dec(v) atomic64_sub(1, (v))
704
705/*
706 * atomic64_add_negative - add and test if negative
707 * @v: pointer of type atomic64_t
708 * @i: integer value to add
709 *
710 * Atomically adds @i to @v and returns true
711 * if the result is negative, or false when
712 * result is greater than or equal to zero.
713 */
714#define atomic64_add_negative(i, v) (atomic64_add_return(i, (v)) < 0)
715
716#endif /* CONFIG_64BIT */ 544#endif /* CONFIG_64BIT */
717 545
718#endif /* _ASM_ATOMIC_H */ 546#endif /* _ASM_ATOMIC_H */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 7cd76f93a438..f7ea8e21656b 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -515,7 +515,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
515 dvcpu->arch.wait = 0; 515 dvcpu->arch.wait = 0;
516 516
517 if (swq_has_sleeper(&dvcpu->wq)) 517 if (swq_has_sleeper(&dvcpu->wq))
518 swake_up(&dvcpu->wq); 518 swake_up_one(&dvcpu->wq);
519 519
520 return 0; 520 return 0;
521} 521}
@@ -1204,7 +1204,7 @@ static void kvm_mips_comparecount_func(unsigned long data)
1204 1204
1205 vcpu->arch.wait = 0; 1205 vcpu->arch.wait = 0;
1206 if (swq_has_sleeper(&vcpu->wq)) 1206 if (swq_has_sleeper(&vcpu->wq))
1207 swake_up(&vcpu->wq); 1207 swake_up_one(&vcpu->wq);
1208} 1208}
1209 1209
1210/* low level hrtimer wake routine */ 1210/* low level hrtimer wake routine */
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 9ecad05bfc73..dfb6a79ba7ff 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -27,7 +27,6 @@ config OPENRISC
27 select GENERIC_STRNLEN_USER 27 select GENERIC_STRNLEN_USER
28 select GENERIC_SMP_IDLE_THREAD 28 select GENERIC_SMP_IDLE_THREAD
29 select MODULES_USE_ELF_RELA 29 select MODULES_USE_ELF_RELA
30 select MULTI_IRQ_HANDLER
31 select HAVE_DEBUG_STACKOVERFLOW 30 select HAVE_DEBUG_STACKOVERFLOW
32 select OR1K_PIC 31 select OR1K_PIC
33 select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 32 select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
@@ -36,6 +35,7 @@ config OPENRISC
36 select ARCH_USE_QUEUED_RWLOCKS 35 select ARCH_USE_QUEUED_RWLOCKS
37 select OMPIC if SMP 36 select OMPIC if SMP
38 select ARCH_WANT_FRAME_POINTERS 37 select ARCH_WANT_FRAME_POINTERS
38 select GENERIC_IRQ_MULTI_HANDLER
39 39
40config CPU_BIG_ENDIAN 40config CPU_BIG_ENDIAN
41 def_bool y 41 def_bool y
@@ -69,9 +69,6 @@ config STACKTRACE_SUPPORT
69config LOCKDEP_SUPPORT 69config LOCKDEP_SUPPORT
70 def_bool y 70 def_bool y
71 71
72config MULTI_IRQ_HANDLER
73 def_bool y
74
75source "init/Kconfig" 72source "init/Kconfig"
76 73
77source "kernel/Kconfig.freezer" 74source "kernel/Kconfig.freezer"
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
index 146e1660f00e..b589fac39b92 100644
--- a/arch/openrisc/include/asm/atomic.h
+++ b/arch/openrisc/include/asm/atomic.h
@@ -100,7 +100,7 @@ ATOMIC_OP(xor)
100 * 100 *
101 * This is often used through atomic_inc_not_zero() 101 * This is often used through atomic_inc_not_zero()
102 */ 102 */
103static inline int __atomic_add_unless(atomic_t *v, int a, int u) 103static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
104{ 104{
105 int old, tmp; 105 int old, tmp;
106 106
@@ -119,7 +119,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
119 119
120 return old; 120 return old;
121} 121}
122#define __atomic_add_unless __atomic_add_unless 122#define atomic_fetch_add_unless atomic_fetch_add_unless
123 123
124#include <asm-generic/atomic.h> 124#include <asm-generic/atomic.h>
125 125
diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
index d29f7db53906..f9cd43a39d72 100644
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -16,8 +16,9 @@
16#ifndef __ASM_OPENRISC_CMPXCHG_H 16#ifndef __ASM_OPENRISC_CMPXCHG_H
17#define __ASM_OPENRISC_CMPXCHG_H 17#define __ASM_OPENRISC_CMPXCHG_H
18 18
19#include <linux/bits.h>
20#include <linux/compiler.h>
19#include <linux/types.h> 21#include <linux/types.h>
20#include <linux/bitops.h>
21 22
22#define __HAVE_ARCH_CMPXCHG 1 23#define __HAVE_ARCH_CMPXCHG 1
23 24
diff --git a/arch/openrisc/include/asm/irq.h b/arch/openrisc/include/asm/irq.h
index d9eee0a2b7b4..eb612b1865d2 100644
--- a/arch/openrisc/include/asm/irq.h
+++ b/arch/openrisc/include/asm/irq.h
@@ -24,6 +24,4 @@
24 24
25#define NO_IRQ (-1) 25#define NO_IRQ (-1)
26 26
27extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
28
29#endif /* __ASM_OPENRISC_IRQ_H__ */ 27#endif /* __ASM_OPENRISC_IRQ_H__ */
diff --git a/arch/openrisc/kernel/irq.c b/arch/openrisc/kernel/irq.c
index 35e478a93116..5f9445effaf8 100644
--- a/arch/openrisc/kernel/irq.c
+++ b/arch/openrisc/kernel/irq.c
@@ -41,13 +41,6 @@ void __init init_IRQ(void)
41 irqchip_init(); 41 irqchip_init();
42} 42}
43 43
44static void (*handle_arch_irq)(struct pt_regs *);
45
46void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
47{
48 handle_arch_irq = handle_irq;
49}
50
51void __irq_entry do_IRQ(struct pt_regs *regs) 44void __irq_entry do_IRQ(struct pt_regs *regs)
52{ 45{
53 handle_arch_irq(regs); 46 handle_arch_irq(regs);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 17526bebcbd2..e7705dde953f 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -11,7 +11,6 @@ config PARISC
11 select ARCH_HAS_ELF_RANDOMIZE 11 select ARCH_HAS_ELF_RANDOMIZE
12 select ARCH_HAS_STRICT_KERNEL_RWX 12 select ARCH_HAS_STRICT_KERNEL_RWX
13 select ARCH_HAS_UBSAN_SANITIZE_ALL 13 select ARCH_HAS_UBSAN_SANITIZE_ALL
14 select ARCH_WANTS_UBSAN_NO_NULL
15 select ARCH_SUPPORTS_MEMORY_FAILURE 14 select ARCH_SUPPORTS_MEMORY_FAILURE
16 select RTC_CLASS 15 select RTC_CLASS
17 select RTC_DRV_GENERIC 16 select RTC_DRV_GENERIC
@@ -195,7 +194,7 @@ config PREFETCH
195 194
196config MLONGCALLS 195config MLONGCALLS
197 bool "Enable the -mlong-calls compiler option for big kernels" 196 bool "Enable the -mlong-calls compiler option for big kernels"
198 def_bool y if (!MODULES) 197 default y
199 depends on PA8X00 198 depends on PA8X00
200 help 199 help
201 If you configure the kernel to include many drivers built-in instead 200 If you configure the kernel to include many drivers built-in instead
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 88bae6676c9b..118953d41763 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -77,30 +77,6 @@ static __inline__ int atomic_read(const atomic_t *v)
77#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) 77#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
78#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) 78#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
79 79
80/**
81 * __atomic_add_unless - add unless the number is a given value
82 * @v: pointer of type atomic_t
83 * @a: the amount to add to v...
84 * @u: ...unless v is equal to u.
85 *
86 * Atomically adds @a to @v, so long as it was not @u.
87 * Returns the old value of @v.
88 */
89static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
90{
91 int c, old;
92 c = atomic_read(v);
93 for (;;) {
94 if (unlikely(c == (u)))
95 break;
96 old = atomic_cmpxchg((v), c, c + (a));
97 if (likely(old == c))
98 break;
99 c = old;
100 }
101 return c;
102}
103
104#define ATOMIC_OP(op, c_op) \ 80#define ATOMIC_OP(op, c_op) \
105static __inline__ void atomic_##op(int i, atomic_t *v) \ 81static __inline__ void atomic_##op(int i, atomic_t *v) \
106{ \ 82{ \
@@ -160,28 +136,6 @@ ATOMIC_OPS(xor, ^=)
160#undef ATOMIC_OP_RETURN 136#undef ATOMIC_OP_RETURN
161#undef ATOMIC_OP 137#undef ATOMIC_OP
162 138
163#define atomic_inc(v) (atomic_add( 1,(v)))
164#define atomic_dec(v) (atomic_add( -1,(v)))
165
166#define atomic_inc_return(v) (atomic_add_return( 1,(v)))
167#define atomic_dec_return(v) (atomic_add_return( -1,(v)))
168
169#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
170
171/*
172 * atomic_inc_and_test - increment and test
173 * @v: pointer of type atomic_t
174 *
175 * Atomically increments @v by 1
176 * and returns true if the result is zero, or false for all
177 * other cases.
178 */
179#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
180
181#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
182
183#define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0)
184
185#define ATOMIC_INIT(i) { (i) } 139#define ATOMIC_INIT(i) { (i) }
186 140
187#ifdef CONFIG_64BIT 141#ifdef CONFIG_64BIT
@@ -264,72 +218,11 @@ atomic64_read(const atomic64_t *v)
264 return READ_ONCE((v)->counter); 218 return READ_ONCE((v)->counter);
265} 219}
266 220
267#define atomic64_inc(v) (atomic64_add( 1,(v)))
268#define atomic64_dec(v) (atomic64_add( -1,(v)))
269
270#define atomic64_inc_return(v) (atomic64_add_return( 1,(v)))
271#define atomic64_dec_return(v) (atomic64_add_return( -1,(v)))
272
273#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
274
275#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
276#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0)
277#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i),(v)) == 0)
278
279/* exported interface */ 221/* exported interface */
280#define atomic64_cmpxchg(v, o, n) \ 222#define atomic64_cmpxchg(v, o, n) \
281 ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) 223 ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
282#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) 224#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
283 225
284/**
285 * atomic64_add_unless - add unless the number is a given value
286 * @v: pointer of type atomic64_t
287 * @a: the amount to add to v...
288 * @u: ...unless v is equal to u.
289 *
290 * Atomically adds @a to @v, so long as it was not @u.
291 * Returns the old value of @v.
292 */
293static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
294{
295 long c, old;
296 c = atomic64_read(v);
297 for (;;) {
298 if (unlikely(c == (u)))
299 break;
300 old = atomic64_cmpxchg((v), c, c + (a));
301 if (likely(old == c))
302 break;
303 c = old;
304 }
305 return c != (u);
306}
307
308#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
309
310/*
311 * atomic64_dec_if_positive - decrement by 1 if old value positive
312 * @v: pointer of type atomic_t
313 *
314 * The function returns the old value of *v minus 1, even if
315 * the atomic variable, v, was not decremented.
316 */
317static inline long atomic64_dec_if_positive(atomic64_t *v)
318{
319 long c, old, dec;
320 c = atomic64_read(v);
321 for (;;) {
322 dec = c - 1;
323 if (unlikely(dec < 0))
324 break;
325 old = atomic64_cmpxchg((v), c, dec);
326 if (likely(old == c))
327 break;
328 c = old;
329 }
330 return dec;
331}
332
333#endif /* !CONFIG_64BIT */ 226#endif /* !CONFIG_64BIT */
334 227
335 228
diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h
new file mode 100644
index 000000000000..dbaaca84f27f
--- /dev/null
+++ b/arch/parisc/include/asm/barrier.h
@@ -0,0 +1,32 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __ASM_BARRIER_H
3#define __ASM_BARRIER_H
4
5#ifndef __ASSEMBLY__
6
7/* The synchronize caches instruction executes as a nop on systems in
8 which all memory references are performed in order. */
9#define synchronize_caches() __asm__ __volatile__ ("sync" : : : "memory")
10
11#if defined(CONFIG_SMP)
12#define mb() do { synchronize_caches(); } while (0)
13#define rmb() mb()
14#define wmb() mb()
15#define dma_rmb() mb()
16#define dma_wmb() mb()
17#else
18#define mb() barrier()
19#define rmb() barrier()
20#define wmb() barrier()
21#define dma_rmb() barrier()
22#define dma_wmb() barrier()
23#endif
24
25#define __smp_mb() mb()
26#define __smp_rmb() mb()
27#define __smp_wmb() mb()
28
29#include <asm-generic/barrier.h>
30
31#endif /* !__ASSEMBLY__ */
32#endif /* __ASM_BARRIER_H */
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index e95207c0565e..1b4732e20137 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -482,6 +482,8 @@
482 .macro tlb_unlock0 spc,tmp 482 .macro tlb_unlock0 spc,tmp
483#ifdef CONFIG_SMP 483#ifdef CONFIG_SMP
484 or,COND(=) %r0,\spc,%r0 484 or,COND(=) %r0,\spc,%r0
485 sync
486 or,COND(=) %r0,\spc,%r0
485 stw \spc,0(\tmp) 487 stw \spc,0(\tmp)
486#endif 488#endif
487 .endm 489 .endm
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 22e6374ece44..97451e67d35b 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -353,6 +353,7 @@ ENDPROC_CFI(flush_data_cache_local)
353 .macro tlb_unlock la,flags,tmp 353 .macro tlb_unlock la,flags,tmp
354#ifdef CONFIG_SMP 354#ifdef CONFIG_SMP
355 ldi 1,\tmp 355 ldi 1,\tmp
356 sync
356 stw \tmp,0(\la) 357 stw \tmp,0(\la)
357 mtsm \flags 358 mtsm \flags
358#endif 359#endif
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index e775f80ae28c..4886a6db42e9 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -633,6 +633,7 @@ cas_action:
633 sub,<> %r28, %r25, %r0 633 sub,<> %r28, %r25, %r0
6342: stw,ma %r24, 0(%r26) 6342: stw,ma %r24, 0(%r26)
635 /* Free lock */ 635 /* Free lock */
636 sync
636 stw,ma %r20, 0(%sr2,%r20) 637 stw,ma %r20, 0(%sr2,%r20)
637#if ENABLE_LWS_DEBUG 638#if ENABLE_LWS_DEBUG
638 /* Clear thread register indicator */ 639 /* Clear thread register indicator */
@@ -647,6 +648,7 @@ cas_action:
6473: 6483:
648 /* Error occurred on load or store */ 649 /* Error occurred on load or store */
649 /* Free lock */ 650 /* Free lock */
651 sync
650 stw %r20, 0(%sr2,%r20) 652 stw %r20, 0(%sr2,%r20)
651#if ENABLE_LWS_DEBUG 653#if ENABLE_LWS_DEBUG
652 stw %r0, 4(%sr2,%r20) 654 stw %r0, 4(%sr2,%r20)
@@ -848,6 +850,7 @@ cas2_action:
848 850
849cas2_end: 851cas2_end:
850 /* Free lock */ 852 /* Free lock */
853 sync
851 stw,ma %r20, 0(%sr2,%r20) 854 stw,ma %r20, 0(%sr2,%r20)
852 /* Enable interrupts */ 855 /* Enable interrupts */
853 ssm PSW_SM_I, %r0 856 ssm PSW_SM_I, %r0
@@ -858,6 +861,7 @@ cas2_end:
85822: 86122:
859 /* Error occurred on load or store */ 862 /* Error occurred on load or store */
860 /* Free lock */ 863 /* Free lock */
864 sync
861 stw %r20, 0(%sr2,%r20) 865 stw %r20, 0(%sr2,%r20)
862 ssm PSW_SM_I, %r0 866 ssm PSW_SM_I, %r0
863 ldo 1(%r0),%r28 867 ldo 1(%r0),%r28
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 682b3e6a1e21..963abf8bf1c0 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -18,18 +18,11 @@
18 * a "bne-" instruction at the end, so an isync is enough as a acquire barrier 18 * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
19 * on the platform without lwsync. 19 * on the platform without lwsync.
20 */ 20 */
21#define __atomic_op_acquire(op, args...) \ 21#define __atomic_acquire_fence() \
22({ \ 22 __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
23 typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ 23
24 __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory"); \ 24#define __atomic_release_fence() \
25 __ret; \ 25 __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
26})
27
28#define __atomic_op_release(op, args...) \
29({ \
30 __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory"); \
31 op##_relaxed(args); \
32})
33 26
34static __inline__ int atomic_read(const atomic_t *v) 27static __inline__ int atomic_read(const atomic_t *v)
35{ 28{
@@ -129,8 +122,6 @@ ATOMIC_OPS(xor, xor)
129#undef ATOMIC_OP_RETURN_RELAXED 122#undef ATOMIC_OP_RETURN_RELAXED
130#undef ATOMIC_OP 123#undef ATOMIC_OP
131 124
132#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
133
134static __inline__ void atomic_inc(atomic_t *v) 125static __inline__ void atomic_inc(atomic_t *v)
135{ 126{
136 int t; 127 int t;
@@ -145,6 +136,7 @@ static __inline__ void atomic_inc(atomic_t *v)
145 : "r" (&v->counter) 136 : "r" (&v->counter)
146 : "cc", "xer"); 137 : "cc", "xer");
147} 138}
139#define atomic_inc atomic_inc
148 140
149static __inline__ int atomic_inc_return_relaxed(atomic_t *v) 141static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
150{ 142{
@@ -163,16 +155,6 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
163 return t; 155 return t;
164} 156}
165 157
166/*
167 * atomic_inc_and_test - increment and test
168 * @v: pointer of type atomic_t
169 *
170 * Atomically increments @v by 1
171 * and returns true if the result is zero, or false for all
172 * other cases.
173 */
174#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
175
176static __inline__ void atomic_dec(atomic_t *v) 158static __inline__ void atomic_dec(atomic_t *v)
177{ 159{
178 int t; 160 int t;
@@ -187,6 +169,7 @@ static __inline__ void atomic_dec(atomic_t *v)
187 : "r" (&v->counter) 169 : "r" (&v->counter)
188 : "cc", "xer"); 170 : "cc", "xer");
189} 171}
172#define atomic_dec atomic_dec
190 173
191static __inline__ int atomic_dec_return_relaxed(atomic_t *v) 174static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
192{ 175{
@@ -218,7 +201,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
218#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) 201#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
219 202
220/** 203/**
221 * __atomic_add_unless - add unless the number is a given value 204 * atomic_fetch_add_unless - add unless the number is a given value
222 * @v: pointer of type atomic_t 205 * @v: pointer of type atomic_t
223 * @a: the amount to add to v... 206 * @a: the amount to add to v...
224 * @u: ...unless v is equal to u. 207 * @u: ...unless v is equal to u.
@@ -226,13 +209,13 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
226 * Atomically adds @a to @v, so long as it was not @u. 209 * Atomically adds @a to @v, so long as it was not @u.
227 * Returns the old value of @v. 210 * Returns the old value of @v.
228 */ 211 */
229static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) 212static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
230{ 213{
231 int t; 214 int t;
232 215
233 __asm__ __volatile__ ( 216 __asm__ __volatile__ (
234 PPC_ATOMIC_ENTRY_BARRIER 217 PPC_ATOMIC_ENTRY_BARRIER
235"1: lwarx %0,0,%1 # __atomic_add_unless\n\ 218"1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\
236 cmpw 0,%0,%3 \n\ 219 cmpw 0,%0,%3 \n\
237 beq 2f \n\ 220 beq 2f \n\
238 add %0,%2,%0 \n" 221 add %0,%2,%0 \n"
@@ -248,6 +231,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
248 231
249 return t; 232 return t;
250} 233}
234#define atomic_fetch_add_unless atomic_fetch_add_unless
251 235
252/** 236/**
253 * atomic_inc_not_zero - increment unless the number is zero 237 * atomic_inc_not_zero - increment unless the number is zero
@@ -280,9 +264,6 @@ static __inline__ int atomic_inc_not_zero(atomic_t *v)
280} 264}
281#define atomic_inc_not_zero(v) atomic_inc_not_zero((v)) 265#define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
282 266
283#define atomic_sub_and_test(a, v) (atomic_sub_return((a), (v)) == 0)
284#define atomic_dec_and_test(v) (atomic_dec_return((v)) == 0)
285
286/* 267/*
287 * Atomically test *v and decrement if it is greater than 0. 268 * Atomically test *v and decrement if it is greater than 0.
288 * The function returns the old value of *v minus 1, even if 269 * The function returns the old value of *v minus 1, even if
@@ -412,8 +393,6 @@ ATOMIC64_OPS(xor, xor)
412#undef ATOMIC64_OP_RETURN_RELAXED 393#undef ATOMIC64_OP_RETURN_RELAXED
413#undef ATOMIC64_OP 394#undef ATOMIC64_OP
414 395
415#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
416
417static __inline__ void atomic64_inc(atomic64_t *v) 396static __inline__ void atomic64_inc(atomic64_t *v)
418{ 397{
419 long t; 398 long t;
@@ -427,6 +406,7 @@ static __inline__ void atomic64_inc(atomic64_t *v)
427 : "r" (&v->counter) 406 : "r" (&v->counter)
428 : "cc", "xer"); 407 : "cc", "xer");
429} 408}
409#define atomic64_inc atomic64_inc
430 410
431static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v) 411static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
432{ 412{
@@ -444,16 +424,6 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
444 return t; 424 return t;
445} 425}
446 426
447/*
448 * atomic64_inc_and_test - increment and test
449 * @v: pointer of type atomic64_t
450 *
451 * Atomically increments @v by 1
452 * and returns true if the result is zero, or false for all
453 * other cases.
454 */
455#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
456
457static __inline__ void atomic64_dec(atomic64_t *v) 427static __inline__ void atomic64_dec(atomic64_t *v)
458{ 428{
459 long t; 429 long t;
@@ -467,6 +437,7 @@ static __inline__ void atomic64_dec(atomic64_t *v)
467 : "r" (&v->counter) 437 : "r" (&v->counter)
468 : "cc", "xer"); 438 : "cc", "xer");
469} 439}
440#define atomic64_dec atomic64_dec
470 441
471static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v) 442static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
472{ 443{
@@ -487,9 +458,6 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
487#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed 458#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
488#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed 459#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
489 460
490#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
491#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
492
493/* 461/*
494 * Atomically test *v and decrement if it is greater than 0. 462 * Atomically test *v and decrement if it is greater than 0.
495 * The function returns the old value of *v minus 1. 463 * The function returns the old value of *v minus 1.
@@ -513,6 +481,7 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
513 481
514 return t; 482 return t;
515} 483}
484#define atomic64_dec_if_positive atomic64_dec_if_positive
516 485
517#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) 486#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
518#define atomic64_cmpxchg_relaxed(v, o, n) \ 487#define atomic64_cmpxchg_relaxed(v, o, n) \
@@ -524,7 +493,7 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
524#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) 493#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
525 494
526/** 495/**
527 * atomic64_add_unless - add unless the number is a given value 496 * atomic64_fetch_add_unless - add unless the number is a given value
528 * @v: pointer of type atomic64_t 497 * @v: pointer of type atomic64_t
529 * @a: the amount to add to v... 498 * @a: the amount to add to v...
530 * @u: ...unless v is equal to u. 499 * @u: ...unless v is equal to u.
@@ -532,13 +501,13 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
532 * Atomically adds @a to @v, so long as it was not @u. 501 * Atomically adds @a to @v, so long as it was not @u.
533 * Returns the old value of @v. 502 * Returns the old value of @v.
534 */ 503 */
535static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) 504static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
536{ 505{
537 long t; 506 long t;
538 507
539 __asm__ __volatile__ ( 508 __asm__ __volatile__ (
540 PPC_ATOMIC_ENTRY_BARRIER 509 PPC_ATOMIC_ENTRY_BARRIER
541"1: ldarx %0,0,%1 # __atomic_add_unless\n\ 510"1: ldarx %0,0,%1 # atomic64_fetch_add_unless\n\
542 cmpd 0,%0,%3 \n\ 511 cmpd 0,%0,%3 \n\
543 beq 2f \n\ 512 beq 2f \n\
544 add %0,%2,%0 \n" 513 add %0,%2,%0 \n"
@@ -551,8 +520,9 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
551 : "r" (&v->counter), "r" (a), "r" (u) 520 : "r" (&v->counter), "r" (a), "r" (u)
552 : "cc", "memory"); 521 : "cc", "memory");
553 522
554 return t != u; 523 return t;
555} 524}
525#define atomic64_fetch_add_unless atomic64_fetch_add_unless
556 526
557/** 527/**
558 * atomic_inc64_not_zero - increment unless the number is zero 528 * atomic_inc64_not_zero - increment unless the number is zero
@@ -582,6 +552,7 @@ static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
582 552
583 return t1 != 0; 553 return t1 != 0;
584} 554}
555#define atomic64_inc_not_zero(v) atomic64_inc_not_zero((v))
585 556
586#endif /* __powerpc64__ */ 557#endif /* __powerpc64__ */
587 558
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 79d570cbf332..b2f89b621b15 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -143,24 +143,33 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
143{ 143{
144 int c; 144 int c;
145 145
146 c = atomic_dec_if_positive(&mm->context.copros);
147
148 /* Detect imbalance between add and remove */
149 WARN_ON(c < 0);
150
151 /* 146 /*
152 * Need to broadcast a global flush of the full mm before 147 * When removing the last copro, we need to broadcast a global
153 * decrementing active_cpus count, as the next TLBI may be 148 * flush of the full mm, as the next TLBI may be local and the
154 * local and the nMMU and/or PSL need to be cleaned up. 149 * nMMU and/or PSL need to be cleaned up.
155 * Should be rare enough so that it's acceptable. 150 *
151 * Both the 'copros' and 'active_cpus' counts are looked at in
152 * flush_all_mm() to determine the scope (local/global) of the
153 * TLBIs, so we need to flush first before decrementing
154 * 'copros'. If this API is used by several callers for the
155 * same context, it can lead to over-flushing. It's hopefully
156 * not common enough to be a problem.
156 * 157 *
157 * Skip on hash, as we don't know how to do the proper flush 158 * Skip on hash, as we don't know how to do the proper flush
158 * for the time being. Invalidations will remain global if 159 * for the time being. Invalidations will remain global if
159 * used on hash. 160 * used on hash. Note that we can't drop 'copros' either, as
161 * it could make some invalidations local with no flush
162 * in-between.
160 */ 163 */
161 if (c == 0 && radix_enabled()) { 164 if (radix_enabled()) {
162 flush_all_mm(mm); 165 flush_all_mm(mm);
163 dec_mm_active_cpus(mm); 166
167 c = atomic_dec_if_positive(&mm->context.copros);
168 /* Detect imbalance between add and remove */
169 WARN_ON(c < 0);
170
171 if (c == 0)
172 dec_mm_active_cpus(mm);
164 } 173 }
165} 174}
166#else 175#else
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index fe9733ffffaa..471aac313b89 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -42,6 +42,8 @@
42#include <asm/ppc-pci.h> 42#include <asm/ppc-pci.h>
43#include <asm/eeh.h> 43#include <asm/eeh.h>
44 44
45#include "../../../drivers/pci/pci.h"
46
45/* hose_spinlock protects accesses to the the phb_bitmap. */ 47/* hose_spinlock protects accesses to the the phb_bitmap. */
46static DEFINE_SPINLOCK(hose_spinlock); 48static DEFINE_SPINLOCK(hose_spinlock);
47LIST_HEAD(hose_list); 49LIST_HEAD(hose_list);
@@ -1014,7 +1016,7 @@ void pcibios_setup_bus_devices(struct pci_bus *bus)
1014 /* Cardbus can call us to add new devices to a bus, so ignore 1016 /* Cardbus can call us to add new devices to a bus, so ignore
1015 * those who are already fully discovered 1017 * those who are already fully discovered
1016 */ 1018 */
1017 if (dev->is_added) 1019 if (pci_dev_is_added(dev))
1018 continue; 1020 continue;
1019 1021
1020 pcibios_setup_device(dev); 1022 pcibios_setup_device(dev);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de686b340f4a..ee4a8854985e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -216,7 +216,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
216 216
217 wqp = kvm_arch_vcpu_wq(vcpu); 217 wqp = kvm_arch_vcpu_wq(vcpu);
218 if (swq_has_sleeper(wqp)) { 218 if (swq_has_sleeper(wqp)) {
219 swake_up(wqp); 219 swake_up_one(wqp);
220 ++vcpu->stat.halt_wakeup; 220 ++vcpu->stat.halt_wakeup;
221 } 221 }
222 222
@@ -3188,7 +3188,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
3188 } 3188 }
3189 } 3189 }
3190 3190
3191 prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 3191 prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
3192 3192
3193 if (kvmppc_vcore_check_block(vc)) { 3193 if (kvmppc_vcore_check_block(vc)) {
3194 finish_swait(&vc->wq, &wait); 3194 finish_swait(&vc->wq, &wait);
@@ -3311,7 +3311,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3311 kvmppc_start_thread(vcpu, vc); 3311 kvmppc_start_thread(vcpu, vc);
3312 trace_kvm_guest_enter(vcpu); 3312 trace_kvm_guest_enter(vcpu);
3313 } else if (vc->vcore_state == VCORE_SLEEPING) { 3313 } else if (vc->vcore_state == VCORE_SLEEPING) {
3314 swake_up(&vc->wq); 3314 swake_up_one(&vc->wq);
3315 } 3315 }
3316 3316
3317 } 3317 }
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5bd0eb6681bc..70b2e1e0f23c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -46,6 +46,7 @@
46 46
47#include "powernv.h" 47#include "powernv.h"
48#include "pci.h" 48#include "pci.h"
49#include "../../../../drivers/pci/pci.h"
49 50
50#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */ 51#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */
51#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ 52#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
@@ -3138,7 +3139,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
3138 struct pci_dn *pdn; 3139 struct pci_dn *pdn;
3139 int mul, total_vfs; 3140 int mul, total_vfs;
3140 3141
3141 if (!pdev->is_physfn || pdev->is_added) 3142 if (!pdev->is_physfn || pci_dev_is_added(pdev))
3142 return; 3143 return;
3143 3144
3144 pdn = pci_get_pdn(pdev); 3145 pdn = pci_get_pdn(pdev);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 139f0af6c3d9..8a4868a3964b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -71,6 +71,7 @@
71#include <asm/security_features.h> 71#include <asm/security_features.h>
72 72
73#include "pseries.h" 73#include "pseries.h"
74#include "../../../../drivers/pci/pci.h"
74 75
75int CMO_PrPSP = -1; 76int CMO_PrPSP = -1;
76int CMO_SecPSP = -1; 77int CMO_SecPSP = -1;
@@ -664,7 +665,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
664 const int *indexes; 665 const int *indexes;
665 struct device_node *dn = pci_device_to_OF_node(pdev); 666 struct device_node *dn = pci_device_to_OF_node(pdev);
666 667
667 if (!pdev->is_physfn || pdev->is_added) 668 if (!pdev->is_physfn || pci_dev_is_added(pdev))
668 return; 669 return;
669 /*Firmware must support open sriov otherwise dont configure*/ 670 /*Firmware must support open sriov otherwise dont configure*/
670 indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); 671 indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 855115ace98c..c452359c9cb8 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -25,18 +25,11 @@
25 25
26#define ATOMIC_INIT(i) { (i) } 26#define ATOMIC_INIT(i) { (i) }
27 27
28#define __atomic_op_acquire(op, args...) \ 28#define __atomic_acquire_fence() \
29({ \ 29 __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
30 typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ 30
31 __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory"); \ 31#define __atomic_release_fence() \
32 __ret; \ 32 __asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");
33})
34
35#define __atomic_op_release(op, args...) \
36({ \
37 __asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory"); \
38 op##_relaxed(args); \
39})
40 33
41static __always_inline int atomic_read(const atomic_t *v) 34static __always_inline int atomic_read(const atomic_t *v)
42{ 35{
@@ -209,130 +202,8 @@ ATOMIC_OPS(xor, xor, i)
209#undef ATOMIC_FETCH_OP 202#undef ATOMIC_FETCH_OP
210#undef ATOMIC_OP_RETURN 203#undef ATOMIC_OP_RETURN
211 204
212/*
213 * The extra atomic operations that are constructed from one of the core
214 * AMO-based operations above (aside from sub, which is easier to fit above).
215 * These are required to perform a full barrier, but they're OK this way
216 * because atomic_*_return is also required to perform a full barrier.
217 *
218 */
219#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix) \
220static __always_inline \
221bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
222{ \
223 return atomic##prefix##_##func_op##_return(i, v) comp_op I; \
224}
225
226#ifdef CONFIG_GENERIC_ATOMIC64
227#define ATOMIC_OPS(op, func_op, comp_op, I) \
228 ATOMIC_OP(op, func_op, comp_op, I, int, )
229#else
230#define ATOMIC_OPS(op, func_op, comp_op, I) \
231 ATOMIC_OP(op, func_op, comp_op, I, int, ) \
232 ATOMIC_OP(op, func_op, comp_op, I, long, 64)
233#endif
234
235ATOMIC_OPS(add_and_test, add, ==, 0)
236ATOMIC_OPS(sub_and_test, sub, ==, 0)
237ATOMIC_OPS(add_negative, add, <, 0)
238
239#undef ATOMIC_OP
240#undef ATOMIC_OPS
241
242#define ATOMIC_OP(op, func_op, I, c_type, prefix) \
243static __always_inline \
244void atomic##prefix##_##op(atomic##prefix##_t *v) \
245{ \
246 atomic##prefix##_##func_op(I, v); \
247}
248
249#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \
250static __always_inline \
251c_type atomic##prefix##_fetch_##op##_relaxed(atomic##prefix##_t *v) \
252{ \
253 return atomic##prefix##_fetch_##func_op##_relaxed(I, v); \
254} \
255static __always_inline \
256c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \
257{ \
258 return atomic##prefix##_fetch_##func_op(I, v); \
259}
260
261#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix) \
262static __always_inline \
263c_type atomic##prefix##_##op##_return_relaxed(atomic##prefix##_t *v) \
264{ \
265 return atomic##prefix##_fetch_##op##_relaxed(v) c_op I; \
266} \
267static __always_inline \
268c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \
269{ \
270 return atomic##prefix##_fetch_##op(v) c_op I; \
271}
272
273#ifdef CONFIG_GENERIC_ATOMIC64
274#define ATOMIC_OPS(op, asm_op, c_op, I) \
275 ATOMIC_OP( op, asm_op, I, int, ) \
276 ATOMIC_FETCH_OP( op, asm_op, I, int, ) \
277 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, )
278#else
279#define ATOMIC_OPS(op, asm_op, c_op, I) \
280 ATOMIC_OP( op, asm_op, I, int, ) \
281 ATOMIC_FETCH_OP( op, asm_op, I, int, ) \
282 ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \
283 ATOMIC_OP( op, asm_op, I, long, 64) \
284 ATOMIC_FETCH_OP( op, asm_op, I, long, 64) \
285 ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
286#endif
287
288ATOMIC_OPS(inc, add, +, 1)
289ATOMIC_OPS(dec, add, +, -1)
290
291#define atomic_inc_return_relaxed atomic_inc_return_relaxed
292#define atomic_dec_return_relaxed atomic_dec_return_relaxed
293#define atomic_inc_return atomic_inc_return
294#define atomic_dec_return atomic_dec_return
295
296#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
297#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
298#define atomic_fetch_inc atomic_fetch_inc
299#define atomic_fetch_dec atomic_fetch_dec
300
301#ifndef CONFIG_GENERIC_ATOMIC64
302#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
303#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
304#define atomic64_inc_return atomic64_inc_return
305#define atomic64_dec_return atomic64_dec_return
306
307#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
308#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
309#define atomic64_fetch_inc atomic64_fetch_inc
310#define atomic64_fetch_dec atomic64_fetch_dec
311#endif
312
313#undef ATOMIC_OPS
314#undef ATOMIC_OP
315#undef ATOMIC_FETCH_OP
316#undef ATOMIC_OP_RETURN
317
318#define ATOMIC_OP(op, func_op, comp_op, I, prefix) \
319static __always_inline \
320bool atomic##prefix##_##op(atomic##prefix##_t *v) \
321{ \
322 return atomic##prefix##_##func_op##_return(v) comp_op I; \
323}
324
325ATOMIC_OP(inc_and_test, inc, ==, 0, )
326ATOMIC_OP(dec_and_test, dec, ==, 0, )
327#ifndef CONFIG_GENERIC_ATOMIC64
328ATOMIC_OP(inc_and_test, inc, ==, 0, 64)
329ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
330#endif
331
332#undef ATOMIC_OP
333
334/* This is required to provide a full barrier on success. */ 205/* This is required to provide a full barrier on success. */
335static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) 206static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
336{ 207{
337 int prev, rc; 208 int prev, rc;
338 209
@@ -349,9 +220,10 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
349 : "memory"); 220 : "memory");
350 return prev; 221 return prev;
351} 222}
223#define atomic_fetch_add_unless atomic_fetch_add_unless
352 224
353#ifndef CONFIG_GENERIC_ATOMIC64 225#ifndef CONFIG_GENERIC_ATOMIC64
354static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u) 226static __always_inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
355{ 227{
356 long prev, rc; 228 long prev, rc;
357 229
@@ -368,27 +240,7 @@ static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u)
368 : "memory"); 240 : "memory");
369 return prev; 241 return prev;
370} 242}
371 243#define atomic64_fetch_add_unless atomic64_fetch_add_unless
372static __always_inline int atomic64_add_unless(atomic64_t *v, long a, long u)
373{
374 return __atomic64_add_unless(v, a, u) != u;
375}
376#endif
377
378/*
379 * The extra atomic operations that are constructed from one of the core
380 * LR/SC-based operations above.
381 */
382static __always_inline int atomic_inc_not_zero(atomic_t *v)
383{
384 return __atomic_add_unless(v, 1, 0);
385}
386
387#ifndef CONFIG_GENERIC_ATOMIC64
388static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
389{
390 return atomic64_add_unless(v, 1, 0);
391}
392#endif 244#endif
393 245
394/* 246/*
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8a1863d9ed53..4fe5b2affa23 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -106,7 +106,6 @@ config S390
106 select ARCH_USE_BUILTIN_BSWAP 106 select ARCH_USE_BUILTIN_BSWAP
107 select ARCH_USE_CMPXCHG_LOCKREF 107 select ARCH_USE_CMPXCHG_LOCKREF
108 select ARCH_WANTS_DYNAMIC_TASK_STRUCT 108 select ARCH_WANTS_DYNAMIC_TASK_STRUCT
109 select ARCH_WANTS_UBSAN_NO_NULL
110 select ARCH_WANT_IPC_PARSE_VERSION 109 select ARCH_WANT_IPC_PARSE_VERSION
111 select BUILDTIME_EXTABLE_SORT 110 select BUILDTIME_EXTABLE_SORT
112 select CLONE_BACKWARDS2 111 select CLONE_BACKWARDS2
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 4b55532f15c4..fd20ab5d4cf7 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -55,17 +55,9 @@ static inline void atomic_add(int i, atomic_t *v)
55 __atomic_add(i, &v->counter); 55 __atomic_add(i, &v->counter);
56} 56}
57 57
58#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0)
59#define atomic_inc(_v) atomic_add(1, _v)
60#define atomic_inc_return(_v) atomic_add_return(1, _v)
61#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
62#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v) 58#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v)
63#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v) 59#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v)
64#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v) 60#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v)
65#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0)
66#define atomic_dec(_v) atomic_sub(1, _v)
67#define atomic_dec_return(_v) atomic_sub_return(1, _v)
68#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
69 61
70#define ATOMIC_OPS(op) \ 62#define ATOMIC_OPS(op) \
71static inline void atomic_##op(int i, atomic_t *v) \ 63static inline void atomic_##op(int i, atomic_t *v) \
@@ -90,21 +82,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
90 return __atomic_cmpxchg(&v->counter, old, new); 82 return __atomic_cmpxchg(&v->counter, old, new);
91} 83}
92 84
93static inline int __atomic_add_unless(atomic_t *v, int a, int u)
94{
95 int c, old;
96 c = atomic_read(v);
97 for (;;) {
98 if (unlikely(c == u))
99 break;
100 old = atomic_cmpxchg(v, c, c + a);
101 if (likely(old == c))
102 break;
103 c = old;
104 }
105 return c;
106}
107
108#define ATOMIC64_INIT(i) { (i) } 85#define ATOMIC64_INIT(i) { (i) }
109 86
110static inline long atomic64_read(const atomic64_t *v) 87static inline long atomic64_read(const atomic64_t *v)
@@ -168,50 +145,8 @@ ATOMIC64_OPS(xor)
168 145
169#undef ATOMIC64_OPS 146#undef ATOMIC64_OPS
170 147
171static inline int atomic64_add_unless(atomic64_t *v, long i, long u)
172{
173 long c, old;
174
175 c = atomic64_read(v);
176 for (;;) {
177 if (unlikely(c == u))
178 break;
179 old = atomic64_cmpxchg(v, c, c + i);
180 if (likely(old == c))
181 break;
182 c = old;
183 }
184 return c != u;
185}
186
187static inline long atomic64_dec_if_positive(atomic64_t *v)
188{
189 long c, old, dec;
190
191 c = atomic64_read(v);
192 for (;;) {
193 dec = c - 1;
194 if (unlikely(dec < 0))
195 break;
196 old = atomic64_cmpxchg((v), c, dec);
197 if (likely(old == c))
198 break;
199 c = old;
200 }
201 return dec;
202}
203
204#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0)
205#define atomic64_inc(_v) atomic64_add(1, _v)
206#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
207#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
208#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v) 148#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v)
209#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v) 149#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v)
210#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v) 150#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v)
211#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
212#define atomic64_dec(_v) atomic64_sub(1, _v)
213#define atomic64_dec_return(_v) atomic64_sub_return(1, _v)
214#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0)
215#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
216 151
217#endif /* __ARCH_S390_ATOMIC__ */ 152#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index cf561160ea88..e8766beee5ad 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -221,17 +221,22 @@ void read_persistent_clock64(struct timespec64 *ts)
221 ext_to_timespec64(clk, ts); 221 ext_to_timespec64(clk, ts);
222} 222}
223 223
224void read_boot_clock64(struct timespec64 *ts) 224void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
225 struct timespec64 *boot_offset)
225{ 226{
226 unsigned char clk[STORE_CLOCK_EXT_SIZE]; 227 unsigned char clk[STORE_CLOCK_EXT_SIZE];
228 struct timespec64 boot_time;
227 __u64 delta; 229 __u64 delta;
228 230
229 delta = initial_leap_seconds + TOD_UNIX_EPOCH; 231 delta = initial_leap_seconds + TOD_UNIX_EPOCH;
230 memcpy(clk, tod_clock_base, 16); 232 memcpy(clk, tod_clock_base, STORE_CLOCK_EXT_SIZE);
231 *(__u64 *) &clk[1] -= delta; 233 *(__u64 *)&clk[1] -= delta;
232 if (*(__u64 *) &clk[1] > delta) 234 if (*(__u64 *)&clk[1] > delta)
233 clk[0]--; 235 clk[0]--;
234 ext_to_timespec64(clk, ts); 236 ext_to_timespec64(clk, &boot_time);
237
238 read_persistent_clock64(wall_time);
239 *boot_offset = timespec64_sub(*wall_time, boot_time);
235} 240}
236 241
237static u64 read_tod_clock(struct clocksource *cs) 242static u64 read_tod_clock(struct clocksource *cs)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index daa09f89ca2d..fcb55b02990e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1145,7 +1145,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
1145 * yield-candidate. 1145 * yield-candidate.
1146 */ 1146 */
1147 vcpu->preempted = true; 1147 vcpu->preempted = true;
1148 swake_up(&vcpu->wq); 1148 swake_up_one(&vcpu->wq);
1149 vcpu->stat.halt_wakeup++; 1149 vcpu->stat.halt_wakeup++;
1150 } 1150 }
1151 /* 1151 /*
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 0fd0099f43cc..f37b95a80232 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -32,44 +32,9 @@
32#include <asm/atomic-irq.h> 32#include <asm/atomic-irq.h>
33#endif 33#endif
34 34
35#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
36#define atomic_dec_return(v) atomic_sub_return(1, (v))
37#define atomic_inc_return(v) atomic_add_return(1, (v))
38#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
39#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
40#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
41
42#define atomic_inc(v) atomic_add(1, (v))
43#define atomic_dec(v) atomic_sub(1, (v))
44
45#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) 35#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
46#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) 36#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
47 37
48/**
49 * __atomic_add_unless - add unless the number is a given value
50 * @v: pointer of type atomic_t
51 * @a: the amount to add to v...
52 * @u: ...unless v is equal to u.
53 *
54 * Atomically adds @a to @v, so long as it was not @u.
55 * Returns the old value of @v.
56 */
57static inline int __atomic_add_unless(atomic_t *v, int a, int u)
58{
59 int c, old;
60 c = atomic_read(v);
61 for (;;) {
62 if (unlikely(c == (u)))
63 break;
64 old = atomic_cmpxchg((v), c, c + (a));
65 if (likely(old == c))
66 break;
67 c = old;
68 }
69
70 return c;
71}
72
73#endif /* CONFIG_CPU_J2 */ 38#endif /* CONFIG_CPU_J2 */
74 39
75#endif /* __ASM_SH_ATOMIC_H */ 40#endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sh/include/asm/cmpxchg-xchg.h b/arch/sh/include/asm/cmpxchg-xchg.h
index 1e881f5db659..593a9704782b 100644
--- a/arch/sh/include/asm/cmpxchg-xchg.h
+++ b/arch/sh/include/asm/cmpxchg-xchg.h
@@ -8,7 +8,8 @@
8 * This work is licensed under the terms of the GNU GPL, version 2. See the 8 * This work is licensed under the terms of the GNU GPL, version 2. See the
9 * file "COPYING" in the main directory of this archive for more details. 9 * file "COPYING" in the main directory of this archive for more details.
10 */ 10 */
11#include <linux/bitops.h> 11#include <linux/bits.h>
12#include <linux/compiler.h>
12#include <asm/byteorder.h> 13#include <asm/byteorder.h>
13 14
14/* 15/*
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index ac67828da201..410b263ef5c8 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += local64.h
13generic-y += mcs_spinlock.h 13generic-y += mcs_spinlock.h
14generic-y += mm-arch-hooks.h 14generic-y += mm-arch-hooks.h
15generic-y += module.h 15generic-y += module.h
16generic-y += msi.h
16generic-y += preempt.h 17generic-y += preempt.h
17generic-y += rwsem.h 18generic-y += rwsem.h
18generic-y += serial.h 19generic-y += serial.h
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index d13ce517f4b9..94c930f0bc62 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -27,17 +27,17 @@ int atomic_fetch_or(int, atomic_t *);
27int atomic_fetch_xor(int, atomic_t *); 27int atomic_fetch_xor(int, atomic_t *);
28int atomic_cmpxchg(atomic_t *, int, int); 28int atomic_cmpxchg(atomic_t *, int, int);
29int atomic_xchg(atomic_t *, int); 29int atomic_xchg(atomic_t *, int);
30int __atomic_add_unless(atomic_t *, int, int); 30int atomic_fetch_add_unless(atomic_t *, int, int);
31void atomic_set(atomic_t *, int); 31void atomic_set(atomic_t *, int);
32 32
33#define atomic_fetch_add_unless atomic_fetch_add_unless
34
33#define atomic_set_release(v, i) atomic_set((v), (i)) 35#define atomic_set_release(v, i) atomic_set((v), (i))
34 36
35#define atomic_read(v) READ_ONCE((v)->counter) 37#define atomic_read(v) READ_ONCE((v)->counter)
36 38
37#define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) 39#define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v)))
38#define atomic_sub(i, v) ((void)atomic_add_return(-(int)(i), (v))) 40#define atomic_sub(i, v) ((void)atomic_add_return(-(int)(i), (v)))
39#define atomic_inc(v) ((void)atomic_add_return( 1, (v)))
40#define atomic_dec(v) ((void)atomic_add_return( -1, (v)))
41 41
42#define atomic_and(i, v) ((void)atomic_fetch_and((i), (v))) 42#define atomic_and(i, v) ((void)atomic_fetch_and((i), (v)))
43#define atomic_or(i, v) ((void)atomic_fetch_or((i), (v))) 43#define atomic_or(i, v) ((void)atomic_fetch_or((i), (v)))
@@ -46,22 +46,4 @@ void atomic_set(atomic_t *, int);
46#define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v))) 46#define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v)))
47#define atomic_fetch_sub(i, v) (atomic_fetch_add (-(int)(i), (v))) 47#define atomic_fetch_sub(i, v) (atomic_fetch_add (-(int)(i), (v)))
48 48
49#define atomic_inc_return(v) (atomic_add_return( 1, (v)))
50#define atomic_dec_return(v) (atomic_add_return( -1, (v)))
51
52#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
53
54/*
55 * atomic_inc_and_test - increment and test
56 * @v: pointer of type atomic_t
57 *
58 * Atomically increments @v by 1
59 * and returns true if the result is zero, or false for all
60 * other cases.
61 */
62#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
63
64#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
65#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
66
67#endif /* !(__ARCH_SPARC_ATOMIC__) */ 49#endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 28db058d471b..6963482c81d8 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -50,38 +50,6 @@ ATOMIC_OPS(xor)
50#undef ATOMIC_OP_RETURN 50#undef ATOMIC_OP_RETURN
51#undef ATOMIC_OP 51#undef ATOMIC_OP
52 52
53#define atomic_dec_return(v) atomic_sub_return(1, v)
54#define atomic64_dec_return(v) atomic64_sub_return(1, v)
55
56#define atomic_inc_return(v) atomic_add_return(1, v)
57#define atomic64_inc_return(v) atomic64_add_return(1, v)
58
59/*
60 * atomic_inc_and_test - increment and test
61 * @v: pointer of type atomic_t
62 *
63 * Atomically increments @v by 1
64 * and returns true if the result is zero, or false for all
65 * other cases.
66 */
67#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
68#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
69
70#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
71#define atomic64_sub_and_test(i, v) (atomic64_sub_return(i, v) == 0)
72
73#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
74#define atomic64_dec_and_test(v) (atomic64_sub_return(1, v) == 0)
75
76#define atomic_inc(v) atomic_add(1, v)
77#define atomic64_inc(v) atomic64_add(1, v)
78
79#define atomic_dec(v) atomic_sub(1, v)
80#define atomic64_dec(v) atomic64_sub(1, v)
81
82#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
83#define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0)
84
85#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) 53#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
86 54
87static inline int atomic_xchg(atomic_t *v, int new) 55static inline int atomic_xchg(atomic_t *v, int new)
@@ -89,42 +57,11 @@ static inline int atomic_xchg(atomic_t *v, int new)
89 return xchg(&v->counter, new); 57 return xchg(&v->counter, new);
90} 58}
91 59
92static inline int __atomic_add_unless(atomic_t *v, int a, int u)
93{
94 int c, old;
95 c = atomic_read(v);
96 for (;;) {
97 if (unlikely(c == (u)))
98 break;
99 old = atomic_cmpxchg((v), c, c + (a));
100 if (likely(old == c))
101 break;
102 c = old;
103 }
104 return c;
105}
106
107#define atomic64_cmpxchg(v, o, n) \ 60#define atomic64_cmpxchg(v, o, n) \
108 ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) 61 ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
109#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) 62#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
110 63
111static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
112{
113 long c, old;
114 c = atomic64_read(v);
115 for (;;) {
116 if (unlikely(c == (u)))
117 break;
118 old = atomic64_cmpxchg((v), c, c + (a));
119 if (likely(old == c))
120 break;
121 c = old;
122 }
123 return c != (u);
124}
125
126#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
127
128long atomic64_dec_if_positive(atomic64_t *v); 64long atomic64_dec_if_positive(atomic64_t *v);
65#define atomic64_dec_if_positive atomic64_dec_if_positive
129 66
130#endif /* !(__ARCH_SPARC64_ATOMIC__) */ 67#endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/sparc/include/asm/msi.h b/arch/sparc/include/asm/msi.h
deleted file mode 100644
index 3c17c1074431..000000000000
--- a/arch/sparc/include/asm/msi.h
+++ /dev/null
@@ -1,32 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * msi.h: Defines specific to the MBus - Sbus - Interface.
4 *
5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
7 */
8
9#ifndef _SPARC_MSI_H
10#define _SPARC_MSI_H
11
12/*
13 * Locations of MSI Registers.
14 */
15#define MSI_MBUS_ARBEN 0xe0001008 /* MBus Arbiter Enable register */
16
17/*
18 * Useful bits in the MSI Registers.
19 */
20#define MSI_ASYNC_MODE 0x80000000 /* Operate the MSI asynchronously */
21
22
23static inline void msi_set_sync(void)
24{
25 __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t"
26 "andn %%g3, %2, %%g3\n\t"
27 "sta %%g3, [%0] %1\n\t" : :
28 "r" (MSI_MBUS_ARBEN),
29 "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3");
30}
31
32#endif /* !(_SPARC_MSI_H) */
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 2ef8cfa9677e..f0eba72aa1ad 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -814,7 +814,7 @@ static void __init get_tick_patch(void)
814 } 814 }
815} 815}
816 816
817static void init_tick_ops(struct sparc64_tick_ops *ops) 817static void __init init_tick_ops(struct sparc64_tick_ops *ops)
818{ 818{
819 unsigned long freq, quotient, tick; 819 unsigned long freq, quotient, tick;
820 820
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 465a901a0ada..281fa634bb1a 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -95,7 +95,7 @@ int atomic_cmpxchg(atomic_t *v, int old, int new)
95} 95}
96EXPORT_SYMBOL(atomic_cmpxchg); 96EXPORT_SYMBOL(atomic_cmpxchg);
97 97
98int __atomic_add_unless(atomic_t *v, int a, int u) 98int atomic_fetch_add_unless(atomic_t *v, int a, int u)
99{ 99{
100 int ret; 100 int ret;
101 unsigned long flags; 101 unsigned long flags;
@@ -107,7 +107,7 @@ int __atomic_add_unless(atomic_t *v, int a, int u)
107 spin_unlock_irqrestore(ATOMIC_HASH(v), flags); 107 spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
108 return ret; 108 return ret;
109} 109}
110EXPORT_SYMBOL(__atomic_add_unless); 110EXPORT_SYMBOL(atomic_fetch_add_unless);
111 111
112/* Atomic operations are already serializing */ 112/* Atomic operations are already serializing */
113void atomic_set(atomic_t *v, int i) 113void atomic_set(atomic_t *v, int i)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 1d70c3f6d986..be9cb0065179 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -37,7 +37,6 @@
37#include <asm/mbus.h> 37#include <asm/mbus.h>
38#include <asm/page.h> 38#include <asm/page.h>
39#include <asm/asi.h> 39#include <asm/asi.h>
40#include <asm/msi.h>
41#include <asm/smp.h> 40#include <asm/smp.h>
42#include <asm/io.h> 41#include <asm/io.h>
43 42
@@ -116,6 +115,25 @@ static inline void srmmu_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp)
116 set_pte((pte_t *)ctxp, pte); 115 set_pte((pte_t *)ctxp, pte);
117} 116}
118 117
118/*
119 * Locations of MSI Registers.
120 */
121#define MSI_MBUS_ARBEN 0xe0001008 /* MBus Arbiter Enable register */
122
123/*
124 * Useful bits in the MSI Registers.
125 */
126#define MSI_ASYNC_MODE 0x80000000 /* Operate the MSI asynchronously */
127
128static void msi_set_sync(void)
129{
130 __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t"
131 "andn %%g3, %2, %%g3\n\t"
132 "sta %%g3, [%0] %1\n\t" : :
133 "r" (MSI_MBUS_ARBEN),
134 "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3");
135}
136
119void pmd_set(pmd_t *pmdp, pte_t *ptep) 137void pmd_set(pmd_t *pmdp, pte_t *ptep)
120{ 138{
121 unsigned long ptp; /* Physical address, shifted right by 4 */ 139 unsigned long ptp; /* Physical address, shifted right by 4 */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 887d3a7bb646..6d4774f203d0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -180,7 +180,7 @@ config X86
180 select HAVE_PERF_USER_STACK_DUMP 180 select HAVE_PERF_USER_STACK_DUMP
181 select HAVE_RCU_TABLE_FREE 181 select HAVE_RCU_TABLE_FREE
182 select HAVE_REGS_AND_STACK_ACCESS_API 182 select HAVE_REGS_AND_STACK_ACCESS_API
183 select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION 183 select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
184 select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR 184 select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
185 select HAVE_STACK_VALIDATION if X86_64 185 select HAVE_STACK_VALIDATION if X86_64
186 select HAVE_RSEQ 186 select HAVE_RSEQ
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a08e82856563..7e3c07d6ad42 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -80,11 +80,6 @@ ifeq ($(CONFIG_X86_32),y)
80 # alignment instructions. 80 # alignment instructions.
81 KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4)) 81 KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
82 82
83 # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
84 # a lot more stack due to the lack of sharing of stacklots:
85 KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
86 $(call cc-option,-fno-unit-at-a-time))
87
88 # CPU-specific tuning. Anything which can be shared with UML should go here. 83 # CPU-specific tuning. Anything which can be shared with UML should go here.
89 include arch/x86/Makefile_32.cpu 84 include arch/x86/Makefile_32.cpu
90 KBUILD_CFLAGS += $(cflags-y) 85 KBUILD_CFLAGS += $(cflags-y)
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 0d41d68131cc..2e1382486e91 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -17,6 +17,7 @@
17#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */ 17#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
18 18
19#include <linux/types.h> 19#include <linux/types.h>
20#include <asm/asm.h>
20 21
21static inline bool constant_test_bit(int nr, const void *addr) 22static inline bool constant_test_bit(int nr, const void *addr)
22{ 23{
@@ -28,7 +29,7 @@ static inline bool variable_test_bit(int nr, const void *addr)
28 bool v; 29 bool v;
29 const u32 *p = (const u32 *)addr; 30 const u32 *p = (const u32 *)addr;
30 31
31 asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); 32 asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr));
32 return v; 33 return v;
33} 34}
34 35
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index e98522ea6f09..1458b1700fc7 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -34,74 +34,13 @@ static void setup_boot_services##bits(struct efi_config *c) \
34 \ 34 \
35 table = (typeof(table))sys_table; \ 35 table = (typeof(table))sys_table; \
36 \ 36 \
37 c->runtime_services = table->runtime; \ 37 c->runtime_services = table->runtime; \
38 c->boot_services = table->boottime; \ 38 c->boot_services = table->boottime; \
39 c->text_output = table->con_out; \ 39 c->text_output = table->con_out; \
40} 40}
41BOOT_SERVICES(32); 41BOOT_SERVICES(32);
42BOOT_SERVICES(64); 42BOOT_SERVICES(64);
43 43
44static inline efi_status_t __open_volume32(void *__image, void **__fh)
45{
46 efi_file_io_interface_t *io;
47 efi_loaded_image_32_t *image = __image;
48 efi_file_handle_32_t *fh;
49 efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
50 efi_status_t status;
51 void *handle = (void *)(unsigned long)image->device_handle;
52 unsigned long func;
53
54 status = efi_call_early(handle_protocol, handle,
55 &fs_proto, (void **)&io);
56 if (status != EFI_SUCCESS) {
57 efi_printk(sys_table, "Failed to handle fs_proto\n");
58 return status;
59 }
60
61 func = (unsigned long)io->open_volume;
62 status = efi_early->call(func, io, &fh);
63 if (status != EFI_SUCCESS)
64 efi_printk(sys_table, "Failed to open volume\n");
65
66 *__fh = fh;
67 return status;
68}
69
70static inline efi_status_t __open_volume64(void *__image, void **__fh)
71{
72 efi_file_io_interface_t *io;
73 efi_loaded_image_64_t *image = __image;
74 efi_file_handle_64_t *fh;
75 efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
76 efi_status_t status;
77 void *handle = (void *)(unsigned long)image->device_handle;
78 unsigned long func;
79
80 status = efi_call_early(handle_protocol, handle,
81 &fs_proto, (void **)&io);
82 if (status != EFI_SUCCESS) {
83 efi_printk(sys_table, "Failed to handle fs_proto\n");
84 return status;
85 }
86
87 func = (unsigned long)io->open_volume;
88 status = efi_early->call(func, io, &fh);
89 if (status != EFI_SUCCESS)
90 efi_printk(sys_table, "Failed to open volume\n");
91
92 *__fh = fh;
93 return status;
94}
95
96efi_status_t
97efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
98{
99 if (efi_early->is64)
100 return __open_volume64(__image, __fh);
101
102 return __open_volume32(__image, __fh);
103}
104
105void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) 44void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
106{ 45{
107 efi_call_proto(efi_simple_text_output_protocol, output_string, 46 efi_call_proto(efi_simple_text_output_protocol, output_string,
@@ -109,7 +48,7 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
109} 48}
110 49
111static efi_status_t 50static efi_status_t
112__setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) 51preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
113{ 52{
114 struct pci_setup_rom *rom = NULL; 53 struct pci_setup_rom *rom = NULL;
115 efi_status_t status; 54 efi_status_t status;
@@ -134,16 +73,16 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
134 73
135 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 74 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
136 if (status != EFI_SUCCESS) { 75 if (status != EFI_SUCCESS) {
137 efi_printk(sys_table, "Failed to alloc mem for rom\n"); 76 efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
138 return status; 77 return status;
139 } 78 }
140 79
141 memset(rom, 0, sizeof(*rom)); 80 memset(rom, 0, sizeof(*rom));
142 81
143 rom->data.type = SETUP_PCI; 82 rom->data.type = SETUP_PCI;
144 rom->data.len = size - sizeof(struct setup_data); 83 rom->data.len = size - sizeof(struct setup_data);
145 rom->data.next = 0; 84 rom->data.next = 0;
146 rom->pcilen = pci->romsize; 85 rom->pcilen = pci->romsize;
147 *__rom = rom; 86 *__rom = rom;
148 87
149 status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, 88 status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
@@ -179,96 +118,6 @@ free_struct:
179 return status; 118 return status;
180} 119}
181 120
182static void
183setup_efi_pci32(struct boot_params *params, void **pci_handle,
184 unsigned long size)
185{
186 efi_pci_io_protocol_t *pci = NULL;
187 efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
188 u32 *handles = (u32 *)(unsigned long)pci_handle;
189 efi_status_t status;
190 unsigned long nr_pci;
191 struct setup_data *data;
192 int i;
193
194 data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
195
196 while (data && data->next)
197 data = (struct setup_data *)(unsigned long)data->next;
198
199 nr_pci = size / sizeof(u32);
200 for (i = 0; i < nr_pci; i++) {
201 struct pci_setup_rom *rom = NULL;
202 u32 h = handles[i];
203
204 status = efi_call_early(handle_protocol, h,
205 &pci_proto, (void **)&pci);
206
207 if (status != EFI_SUCCESS)
208 continue;
209
210 if (!pci)
211 continue;
212
213 status = __setup_efi_pci(pci, &rom);
214 if (status != EFI_SUCCESS)
215 continue;
216
217 if (data)
218 data->next = (unsigned long)rom;
219 else
220 params->hdr.setup_data = (unsigned long)rom;
221
222 data = (struct setup_data *)rom;
223
224 }
225}
226
227static void
228setup_efi_pci64(struct boot_params *params, void **pci_handle,
229 unsigned long size)
230{
231 efi_pci_io_protocol_t *pci = NULL;
232 efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
233 u64 *handles = (u64 *)(unsigned long)pci_handle;
234 efi_status_t status;
235 unsigned long nr_pci;
236 struct setup_data *data;
237 int i;
238
239 data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
240
241 while (data && data->next)
242 data = (struct setup_data *)(unsigned long)data->next;
243
244 nr_pci = size / sizeof(u64);
245 for (i = 0; i < nr_pci; i++) {
246 struct pci_setup_rom *rom = NULL;
247 u64 h = handles[i];
248
249 status = efi_call_early(handle_protocol, h,
250 &pci_proto, (void **)&pci);
251
252 if (status != EFI_SUCCESS)
253 continue;
254
255 if (!pci)
256 continue;
257
258 status = __setup_efi_pci(pci, &rom);
259 if (status != EFI_SUCCESS)
260 continue;
261
262 if (data)
263 data->next = (unsigned long)rom;
264 else
265 params->hdr.setup_data = (unsigned long)rom;
266
267 data = (struct setup_data *)rom;
268
269 }
270}
271
272/* 121/*
273 * There's no way to return an informative status from this function, 122 * There's no way to return an informative status from this function,
274 * because any analysis (and printing of error messages) needs to be 123 * because any analysis (and printing of error messages) needs to be
@@ -284,6 +133,9 @@ static void setup_efi_pci(struct boot_params *params)
284 void **pci_handle = NULL; 133 void **pci_handle = NULL;
285 efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; 134 efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
286 unsigned long size = 0; 135 unsigned long size = 0;
136 unsigned long nr_pci;
137 struct setup_data *data;
138 int i;
287 139
288 status = efi_call_early(locate_handle, 140 status = efi_call_early(locate_handle,
289 EFI_LOCATE_BY_PROTOCOL, 141 EFI_LOCATE_BY_PROTOCOL,
@@ -295,7 +147,7 @@ static void setup_efi_pci(struct boot_params *params)
295 size, (void **)&pci_handle); 147 size, (void **)&pci_handle);
296 148
297 if (status != EFI_SUCCESS) { 149 if (status != EFI_SUCCESS) {
298 efi_printk(sys_table, "Failed to alloc mem for pci_handle\n"); 150 efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n");
299 return; 151 return;
300 } 152 }
301 153
@@ -307,10 +159,34 @@ static void setup_efi_pci(struct boot_params *params)
307 if (status != EFI_SUCCESS) 159 if (status != EFI_SUCCESS)
308 goto free_handle; 160 goto free_handle;
309 161
310 if (efi_early->is64) 162 data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
311 setup_efi_pci64(params, pci_handle, size); 163
312 else 164 while (data && data->next)
313 setup_efi_pci32(params, pci_handle, size); 165 data = (struct setup_data *)(unsigned long)data->next;
166
167 nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
168 for (i = 0; i < nr_pci; i++) {
169 efi_pci_io_protocol_t *pci = NULL;
170 struct pci_setup_rom *rom;
171
172 status = efi_call_early(handle_protocol,
173 efi_is_64bit() ? ((u64 *)pci_handle)[i]
174 : ((u32 *)pci_handle)[i],
175 &pci_proto, (void **)&pci);
176 if (status != EFI_SUCCESS || !pci)
177 continue;
178
179 status = preserve_pci_rom_image(pci, &rom);
180 if (status != EFI_SUCCESS)
181 continue;
182
183 if (data)
184 data->next = (unsigned long)rom;
185 else
186 params->hdr.setup_data = (unsigned long)rom;
187
188 data = (struct setup_data *)rom;
189 }
314 190
315free_handle: 191free_handle:
316 efi_call_early(free_pool, pci_handle); 192 efi_call_early(free_pool, pci_handle);
@@ -341,8 +217,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
341 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, 217 status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
342 size + sizeof(struct setup_data), &new); 218 size + sizeof(struct setup_data), &new);
343 if (status != EFI_SUCCESS) { 219 if (status != EFI_SUCCESS) {
344 efi_printk(sys_table, 220 efi_printk(sys_table, "Failed to allocate memory for 'properties'\n");
345 "Failed to alloc mem for properties\n");
346 return; 221 return;
347 } 222 }
348 223
@@ -358,9 +233,9 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
358 new->next = 0; 233 new->next = 0;
359 234
360 data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; 235 data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
361 if (!data) 236 if (!data) {
362 boot_params->hdr.setup_data = (unsigned long)new; 237 boot_params->hdr.setup_data = (unsigned long)new;
363 else { 238 } else {
364 while (data->next) 239 while (data->next)
365 data = (struct setup_data *)(unsigned long)data->next; 240 data = (struct setup_data *)(unsigned long)data->next;
366 data->next = (unsigned long)new; 241 data->next = (unsigned long)new;
@@ -380,81 +255,55 @@ static void setup_quirks(struct boot_params *boot_params)
380 } 255 }
381} 256}
382 257
258/*
259 * See if we have Universal Graphics Adapter (UGA) protocol
260 */
383static efi_status_t 261static efi_status_t
384setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) 262setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
385{ 263{
386 struct efi_uga_draw_protocol *uga = NULL, *first_uga; 264 efi_status_t status;
387 efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; 265 u32 width, height;
266 void **uga_handle = NULL;
267 efi_uga_draw_protocol_t *uga = NULL, *first_uga;
388 unsigned long nr_ugas; 268 unsigned long nr_ugas;
389 u32 *handles = (u32 *)uga_handle;
390 efi_status_t status = EFI_INVALID_PARAMETER;
391 int i; 269 int i;
392 270
393 first_uga = NULL; 271 status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
394 nr_ugas = size / sizeof(u32); 272 size, (void **)&uga_handle);
395 for (i = 0; i < nr_ugas; i++) { 273 if (status != EFI_SUCCESS)
396 efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; 274 return status;
397 u32 w, h, depth, refresh;
398 void *pciio;
399 u32 handle = handles[i];
400
401 status = efi_call_early(handle_protocol, handle,
402 &uga_proto, (void **)&uga);
403 if (status != EFI_SUCCESS)
404 continue;
405
406 efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
407
408 status = efi_early->call((unsigned long)uga->get_mode, uga,
409 &w, &h, &depth, &refresh);
410 if (status == EFI_SUCCESS && (!first_uga || pciio)) {
411 *width = w;
412 *height = h;
413
414 /*
415 * Once we've found a UGA supporting PCIIO,
416 * don't bother looking any further.
417 */
418 if (pciio)
419 break;
420
421 first_uga = uga;
422 }
423 }
424 275
425 return status; 276 status = efi_call_early(locate_handle,
426} 277 EFI_LOCATE_BY_PROTOCOL,
278 uga_proto, NULL, &size, uga_handle);
279 if (status != EFI_SUCCESS)
280 goto free_handle;
427 281
428static efi_status_t 282 height = 0;
429setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) 283 width = 0;
430{
431 struct efi_uga_draw_protocol *uga = NULL, *first_uga;
432 efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
433 unsigned long nr_ugas;
434 u64 *handles = (u64 *)uga_handle;
435 efi_status_t status = EFI_INVALID_PARAMETER;
436 int i;
437 284
438 first_uga = NULL; 285 first_uga = NULL;
439 nr_ugas = size / sizeof(u64); 286 nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
440 for (i = 0; i < nr_ugas; i++) { 287 for (i = 0; i < nr_ugas; i++) {
441 efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; 288 efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
442 u32 w, h, depth, refresh; 289 u32 w, h, depth, refresh;
443 void *pciio; 290 void *pciio;
444 u64 handle = handles[i]; 291 unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i]
292 : ((u32 *)uga_handle)[i];
445 293
446 status = efi_call_early(handle_protocol, handle, 294 status = efi_call_early(handle_protocol, handle,
447 &uga_proto, (void **)&uga); 295 uga_proto, (void **)&uga);
448 if (status != EFI_SUCCESS) 296 if (status != EFI_SUCCESS)
449 continue; 297 continue;
450 298
299 pciio = NULL;
451 efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); 300 efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
452 301
453 status = efi_early->call((unsigned long)uga->get_mode, uga, 302 status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga,
454 &w, &h, &depth, &refresh); 303 &w, &h, &depth, &refresh);
455 if (status == EFI_SUCCESS && (!first_uga || pciio)) { 304 if (status == EFI_SUCCESS && (!first_uga || pciio)) {
456 *width = w; 305 width = w;
457 *height = h; 306 height = h;
458 307
459 /* 308 /*
460 * Once we've found a UGA supporting PCIIO, 309 * Once we've found a UGA supporting PCIIO,
@@ -467,59 +316,28 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
467 } 316 }
468 } 317 }
469 318
470 return status;
471}
472
473/*
474 * See if we have Universal Graphics Adapter (UGA) protocol
475 */
476static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto,
477 unsigned long size)
478{
479 efi_status_t status;
480 u32 width, height;
481 void **uga_handle = NULL;
482
483 status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
484 size, (void **)&uga_handle);
485 if (status != EFI_SUCCESS)
486 return status;
487
488 status = efi_call_early(locate_handle,
489 EFI_LOCATE_BY_PROTOCOL,
490 uga_proto, NULL, &size, uga_handle);
491 if (status != EFI_SUCCESS)
492 goto free_handle;
493
494 height = 0;
495 width = 0;
496
497 if (efi_early->is64)
498 status = setup_uga64(uga_handle, size, &width, &height);
499 else
500 status = setup_uga32(uga_handle, size, &width, &height);
501
502 if (!width && !height) 319 if (!width && !height)
503 goto free_handle; 320 goto free_handle;
504 321
505 /* EFI framebuffer */ 322 /* EFI framebuffer */
506 si->orig_video_isVGA = VIDEO_TYPE_EFI; 323 si->orig_video_isVGA = VIDEO_TYPE_EFI;
507 324
508 si->lfb_depth = 32; 325 si->lfb_depth = 32;
509 si->lfb_width = width; 326 si->lfb_width = width;
510 si->lfb_height = height; 327 si->lfb_height = height;
511 328
512 si->red_size = 8; 329 si->red_size = 8;
513 si->red_pos = 16; 330 si->red_pos = 16;
514 si->green_size = 8; 331 si->green_size = 8;
515 si->green_pos = 8; 332 si->green_pos = 8;
516 si->blue_size = 8; 333 si->blue_size = 8;
517 si->blue_pos = 0; 334 si->blue_pos = 0;
518 si->rsvd_size = 8; 335 si->rsvd_size = 8;
519 si->rsvd_pos = 24; 336 si->rsvd_pos = 24;
520 337
521free_handle: 338free_handle:
522 efi_call_early(free_pool, uga_handle); 339 efi_call_early(free_pool, uga_handle);
340
523 return status; 341 return status;
524} 342}
525 343
@@ -586,7 +404,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
586 if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) 404 if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
587 return NULL; 405 return NULL;
588 406
589 if (efi_early->is64) 407 if (efi_is_64bit())
590 setup_boot_services64(efi_early); 408 setup_boot_services64(efi_early);
591 else 409 else
592 setup_boot_services32(efi_early); 410 setup_boot_services32(efi_early);
@@ -601,7 +419,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
601 status = efi_low_alloc(sys_table, 0x4000, 1, 419 status = efi_low_alloc(sys_table, 0x4000, 1,
602 (unsigned long *)&boot_params); 420 (unsigned long *)&boot_params);
603 if (status != EFI_SUCCESS) { 421 if (status != EFI_SUCCESS) {
604 efi_printk(sys_table, "Failed to alloc lowmem for boot params\n"); 422 efi_printk(sys_table, "Failed to allocate lowmem for boot params\n");
605 return NULL; 423 return NULL;
606 } 424 }
607 425
@@ -617,9 +435,9 @@ struct boot_params *make_boot_params(struct efi_config *c)
617 * Fill out some of the header fields ourselves because the 435 * Fill out some of the header fields ourselves because the
618 * EFI firmware loader doesn't load the first sector. 436 * EFI firmware loader doesn't load the first sector.
619 */ 437 */
620 hdr->root_flags = 1; 438 hdr->root_flags = 1;
621 hdr->vid_mode = 0xffff; 439 hdr->vid_mode = 0xffff;
622 hdr->boot_flag = 0xAA55; 440 hdr->boot_flag = 0xAA55;
623 441
624 hdr->type_of_loader = 0x21; 442 hdr->type_of_loader = 0x21;
625 443
@@ -627,6 +445,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
627 cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size); 445 cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
628 if (!cmdline_ptr) 446 if (!cmdline_ptr)
629 goto fail; 447 goto fail;
448
630 hdr->cmd_line_ptr = (unsigned long)cmdline_ptr; 449 hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
631 /* Fill in upper bits of command line address, NOP on 32 bit */ 450 /* Fill in upper bits of command line address, NOP on 32 bit */
632 boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32; 451 boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
@@ -663,10 +482,12 @@ struct boot_params *make_boot_params(struct efi_config *c)
663 boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32; 482 boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32;
664 483
665 return boot_params; 484 return boot_params;
485
666fail2: 486fail2:
667 efi_free(sys_table, options_size, hdr->cmd_line_ptr); 487 efi_free(sys_table, options_size, hdr->cmd_line_ptr);
668fail: 488fail:
669 efi_free(sys_table, 0x4000, (unsigned long)boot_params); 489 efi_free(sys_table, 0x4000, (unsigned long)boot_params);
490
670 return NULL; 491 return NULL;
671} 492}
672 493
@@ -678,7 +499,7 @@ static void add_e820ext(struct boot_params *params,
678 unsigned long size; 499 unsigned long size;
679 500
680 e820ext->type = SETUP_E820_EXT; 501 e820ext->type = SETUP_E820_EXT;
681 e820ext->len = nr_entries * sizeof(struct boot_e820_entry); 502 e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
682 e820ext->next = 0; 503 e820ext->next = 0;
683 504
684 data = (struct setup_data *)(unsigned long)params->hdr.setup_data; 505 data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
@@ -692,8 +513,8 @@ static void add_e820ext(struct boot_params *params,
692 params->hdr.setup_data = (unsigned long)e820ext; 513 params->hdr.setup_data = (unsigned long)e820ext;
693} 514}
694 515
695static efi_status_t setup_e820(struct boot_params *params, 516static efi_status_t
696 struct setup_data *e820ext, u32 e820ext_size) 517setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_size)
697{ 518{
698 struct boot_e820_entry *entry = params->e820_table; 519 struct boot_e820_entry *entry = params->e820_table;
699 struct efi_info *efi = &params->efi_info; 520 struct efi_info *efi = &params->efi_info;
@@ -814,11 +635,10 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
814} 635}
815 636
816struct exit_boot_struct { 637struct exit_boot_struct {
817 struct boot_params *boot_params; 638 struct boot_params *boot_params;
818 struct efi_info *efi; 639 struct efi_info *efi;
819 struct setup_data *e820ext; 640 struct setup_data *e820ext;
820 __u32 e820ext_size; 641 __u32 e820ext_size;
821 bool is64;
822}; 642};
823 643
824static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, 644static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
@@ -845,25 +665,25 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
845 first = false; 665 first = false;
846 } 666 }
847 667
848 signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; 668 signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
669 : EFI32_LOADER_SIGNATURE;
849 memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); 670 memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
850 671
851 p->efi->efi_systab = (unsigned long)sys_table_arg; 672 p->efi->efi_systab = (unsigned long)sys_table_arg;
852 p->efi->efi_memdesc_size = *map->desc_size; 673 p->efi->efi_memdesc_size = *map->desc_size;
853 p->efi->efi_memdesc_version = *map->desc_ver; 674 p->efi->efi_memdesc_version = *map->desc_ver;
854 p->efi->efi_memmap = (unsigned long)*map->map; 675 p->efi->efi_memmap = (unsigned long)*map->map;
855 p->efi->efi_memmap_size = *map->map_size; 676 p->efi->efi_memmap_size = *map->map_size;
856 677
857#ifdef CONFIG_X86_64 678#ifdef CONFIG_X86_64
858 p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; 679 p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
859 p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; 680 p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
860#endif 681#endif
861 682
862 return EFI_SUCCESS; 683 return EFI_SUCCESS;
863} 684}
864 685
865static efi_status_t exit_boot(struct boot_params *boot_params, 686static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
866 void *handle, bool is64)
867{ 687{
868 unsigned long map_sz, key, desc_size, buff_size; 688 unsigned long map_sz, key, desc_size, buff_size;
869 efi_memory_desc_t *mem_map; 689 efi_memory_desc_t *mem_map;
@@ -874,17 +694,16 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
874 struct efi_boot_memmap map; 694 struct efi_boot_memmap map;
875 struct exit_boot_struct priv; 695 struct exit_boot_struct priv;
876 696
877 map.map = &mem_map; 697 map.map = &mem_map;
878 map.map_size = &map_sz; 698 map.map_size = &map_sz;
879 map.desc_size = &desc_size; 699 map.desc_size = &desc_size;
880 map.desc_ver = &desc_version; 700 map.desc_ver = &desc_version;
881 map.key_ptr = &key; 701 map.key_ptr = &key;
882 map.buff_size = &buff_size; 702 map.buff_size = &buff_size;
883 priv.boot_params = boot_params; 703 priv.boot_params = boot_params;
884 priv.efi = &boot_params->efi_info; 704 priv.efi = &boot_params->efi_info;
885 priv.e820ext = NULL; 705 priv.e820ext = NULL;
886 priv.e820ext_size = 0; 706 priv.e820ext_size = 0;
887 priv.is64 = is64;
888 707
889 /* Might as well exit boot services now */ 708 /* Might as well exit boot services now */
890 status = efi_exit_boot_services(sys_table, handle, &map, &priv, 709 status = efi_exit_boot_services(sys_table, handle, &map, &priv,
@@ -892,10 +711,11 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
892 if (status != EFI_SUCCESS) 711 if (status != EFI_SUCCESS)
893 return status; 712 return status;
894 713
895 e820ext = priv.e820ext; 714 e820ext = priv.e820ext;
896 e820ext_size = priv.e820ext_size; 715 e820ext_size = priv.e820ext_size;
716
897 /* Historic? */ 717 /* Historic? */
898 boot_params->alt_mem_k = 32 * 1024; 718 boot_params->alt_mem_k = 32 * 1024;
899 719
900 status = setup_e820(boot_params, e820ext, e820ext_size); 720 status = setup_e820(boot_params, e820ext, e820ext_size);
901 if (status != EFI_SUCCESS) 721 if (status != EFI_SUCCESS)
@@ -908,8 +728,8 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
908 * On success we return a pointer to a boot_params structure, and NULL 728 * On success we return a pointer to a boot_params structure, and NULL
909 * on failure. 729 * on failure.
910 */ 730 */
911struct boot_params *efi_main(struct efi_config *c, 731struct boot_params *
912 struct boot_params *boot_params) 732efi_main(struct efi_config *c, struct boot_params *boot_params)
913{ 733{
914 struct desc_ptr *gdt = NULL; 734 struct desc_ptr *gdt = NULL;
915 efi_loaded_image_t *image; 735 efi_loaded_image_t *image;
@@ -918,13 +738,11 @@ struct boot_params *efi_main(struct efi_config *c,
918 struct desc_struct *desc; 738 struct desc_struct *desc;
919 void *handle; 739 void *handle;
920 efi_system_table_t *_table; 740 efi_system_table_t *_table;
921 bool is64;
922 741
923 efi_early = c; 742 efi_early = c;
924 743
925 _table = (efi_system_table_t *)(unsigned long)efi_early->table; 744 _table = (efi_system_table_t *)(unsigned long)efi_early->table;
926 handle = (void *)(unsigned long)efi_early->image_handle; 745 handle = (void *)(unsigned long)efi_early->image_handle;
927 is64 = efi_early->is64;
928 746
929 sys_table = _table; 747 sys_table = _table;
930 748
@@ -932,7 +750,7 @@ struct boot_params *efi_main(struct efi_config *c,
932 if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) 750 if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
933 goto fail; 751 goto fail;
934 752
935 if (is64) 753 if (efi_is_64bit())
936 setup_boot_services64(efi_early); 754 setup_boot_services64(efi_early);
937 else 755 else
938 setup_boot_services32(efi_early); 756 setup_boot_services32(efi_early);
@@ -957,7 +775,7 @@ struct boot_params *efi_main(struct efi_config *c,
957 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, 775 status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
958 sizeof(*gdt), (void **)&gdt); 776 sizeof(*gdt), (void **)&gdt);
959 if (status != EFI_SUCCESS) { 777 if (status != EFI_SUCCESS) {
960 efi_printk(sys_table, "Failed to alloc mem for gdt structure\n"); 778 efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n");
961 goto fail; 779 goto fail;
962 } 780 }
963 781
@@ -965,7 +783,7 @@ struct boot_params *efi_main(struct efi_config *c,
965 status = efi_low_alloc(sys_table, gdt->size, 8, 783 status = efi_low_alloc(sys_table, gdt->size, 8,
966 (unsigned long *)&gdt->address); 784 (unsigned long *)&gdt->address);
967 if (status != EFI_SUCCESS) { 785 if (status != EFI_SUCCESS) {
968 efi_printk(sys_table, "Failed to alloc mem for gdt\n"); 786 efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n");
969 goto fail; 787 goto fail;
970 } 788 }
971 789
@@ -988,7 +806,7 @@ struct boot_params *efi_main(struct efi_config *c,
988 hdr->code32_start = bzimage_addr; 806 hdr->code32_start = bzimage_addr;
989 } 807 }
990 808
991 status = exit_boot(boot_params, handle, is64); 809 status = exit_boot(boot_params, handle);
992 if (status != EFI_SUCCESS) { 810 if (status != EFI_SUCCESS) {
993 efi_printk(sys_table, "exit_boot() failed!\n"); 811 efi_printk(sys_table, "exit_boot() failed!\n");
994 goto fail; 812 goto fail;
@@ -1002,19 +820,20 @@ struct boot_params *efi_main(struct efi_config *c,
1002 820
1003 if (IS_ENABLED(CONFIG_X86_64)) { 821 if (IS_ENABLED(CONFIG_X86_64)) {
1004 /* __KERNEL32_CS */ 822 /* __KERNEL32_CS */
1005 desc->limit0 = 0xffff; 823 desc->limit0 = 0xffff;
1006 desc->base0 = 0x0000; 824 desc->base0 = 0x0000;
1007 desc->base1 = 0x0000; 825 desc->base1 = 0x0000;
1008 desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; 826 desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
1009 desc->s = DESC_TYPE_CODE_DATA; 827 desc->s = DESC_TYPE_CODE_DATA;
1010 desc->dpl = 0; 828 desc->dpl = 0;
1011 desc->p = 1; 829 desc->p = 1;
1012 desc->limit1 = 0xf; 830 desc->limit1 = 0xf;
1013 desc->avl = 0; 831 desc->avl = 0;
1014 desc->l = 0; 832 desc->l = 0;
1015 desc->d = SEG_OP_SIZE_32BIT; 833 desc->d = SEG_OP_SIZE_32BIT;
1016 desc->g = SEG_GRANULARITY_4KB; 834 desc->g = SEG_GRANULARITY_4KB;
1017 desc->base2 = 0x00; 835 desc->base2 = 0x00;
836
1018 desc++; 837 desc++;
1019 } else { 838 } else {
1020 /* Second entry is unused on 32-bit */ 839 /* Second entry is unused on 32-bit */
@@ -1022,15 +841,16 @@ struct boot_params *efi_main(struct efi_config *c,
1022 } 841 }
1023 842
1024 /* __KERNEL_CS */ 843 /* __KERNEL_CS */
1025 desc->limit0 = 0xffff; 844 desc->limit0 = 0xffff;
1026 desc->base0 = 0x0000; 845 desc->base0 = 0x0000;
1027 desc->base1 = 0x0000; 846 desc->base1 = 0x0000;
1028 desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; 847 desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
1029 desc->s = DESC_TYPE_CODE_DATA; 848 desc->s = DESC_TYPE_CODE_DATA;
1030 desc->dpl = 0; 849 desc->dpl = 0;
1031 desc->p = 1; 850 desc->p = 1;
1032 desc->limit1 = 0xf; 851 desc->limit1 = 0xf;
1033 desc->avl = 0; 852 desc->avl = 0;
853
1034 if (IS_ENABLED(CONFIG_X86_64)) { 854 if (IS_ENABLED(CONFIG_X86_64)) {
1035 desc->l = 1; 855 desc->l = 1;
1036 desc->d = 0; 856 desc->d = 0;
@@ -1038,41 +858,41 @@ struct boot_params *efi_main(struct efi_config *c,
1038 desc->l = 0; 858 desc->l = 0;
1039 desc->d = SEG_OP_SIZE_32BIT; 859 desc->d = SEG_OP_SIZE_32BIT;
1040 } 860 }
1041 desc->g = SEG_GRANULARITY_4KB; 861 desc->g = SEG_GRANULARITY_4KB;
1042 desc->base2 = 0x00; 862 desc->base2 = 0x00;
1043 desc++; 863 desc++;
1044 864
1045 /* __KERNEL_DS */ 865 /* __KERNEL_DS */
1046 desc->limit0 = 0xffff; 866 desc->limit0 = 0xffff;
1047 desc->base0 = 0x0000; 867 desc->base0 = 0x0000;
1048 desc->base1 = 0x0000; 868 desc->base1 = 0x0000;
1049 desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE; 869 desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
1050 desc->s = DESC_TYPE_CODE_DATA; 870 desc->s = DESC_TYPE_CODE_DATA;
1051 desc->dpl = 0; 871 desc->dpl = 0;
1052 desc->p = 1; 872 desc->p = 1;
1053 desc->limit1 = 0xf; 873 desc->limit1 = 0xf;
1054 desc->avl = 0; 874 desc->avl = 0;
1055 desc->l = 0; 875 desc->l = 0;
1056 desc->d = SEG_OP_SIZE_32BIT; 876 desc->d = SEG_OP_SIZE_32BIT;
1057 desc->g = SEG_GRANULARITY_4KB; 877 desc->g = SEG_GRANULARITY_4KB;
1058 desc->base2 = 0x00; 878 desc->base2 = 0x00;
1059 desc++; 879 desc++;
1060 880
1061 if (IS_ENABLED(CONFIG_X86_64)) { 881 if (IS_ENABLED(CONFIG_X86_64)) {
1062 /* Task segment value */ 882 /* Task segment value */
1063 desc->limit0 = 0x0000; 883 desc->limit0 = 0x0000;
1064 desc->base0 = 0x0000; 884 desc->base0 = 0x0000;
1065 desc->base1 = 0x0000; 885 desc->base1 = 0x0000;
1066 desc->type = SEG_TYPE_TSS; 886 desc->type = SEG_TYPE_TSS;
1067 desc->s = 0; 887 desc->s = 0;
1068 desc->dpl = 0; 888 desc->dpl = 0;
1069 desc->p = 1; 889 desc->p = 1;
1070 desc->limit1 = 0x0; 890 desc->limit1 = 0x0;
1071 desc->avl = 0; 891 desc->avl = 0;
1072 desc->l = 0; 892 desc->l = 0;
1073 desc->d = 0; 893 desc->d = 0;
1074 desc->g = SEG_GRANULARITY_4KB; 894 desc->g = SEG_GRANULARITY_4KB;
1075 desc->base2 = 0x00; 895 desc->base2 = 0x00;
1076 desc++; 896 desc++;
1077 } 897 }
1078 898
@@ -1082,5 +902,6 @@ struct boot_params *efi_main(struct efi_config *c,
1082 return boot_params; 902 return boot_params;
1083fail: 903fail:
1084 efi_printk(sys_table, "efi_main() failed!\n"); 904 efi_printk(sys_table, "efi_main() failed!\n");
905
1085 return NULL; 906 return NULL;
1086} 907}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index e799dc5c6448..8297387c4676 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -12,22 +12,22 @@
12 12
13#define DESC_TYPE_CODE_DATA (1 << 0) 13#define DESC_TYPE_CODE_DATA (1 << 0)
14 14
15struct efi_uga_draw_protocol_32 { 15typedef struct {
16 u32 get_mode; 16 u32 get_mode;
17 u32 set_mode; 17 u32 set_mode;
18 u32 blt; 18 u32 blt;
19}; 19} efi_uga_draw_protocol_32_t;
20 20
21struct efi_uga_draw_protocol_64 { 21typedef struct {
22 u64 get_mode; 22 u64 get_mode;
23 u64 set_mode; 23 u64 set_mode;
24 u64 blt; 24 u64 blt;
25}; 25} efi_uga_draw_protocol_64_t;
26 26
27struct efi_uga_draw_protocol { 27typedef struct {
28 void *get_mode; 28 void *get_mode;
29 void *set_mode; 29 void *set_mode;
30 void *blt; 30 void *blt;
31}; 31} efi_uga_draw_protocol_t;
32 32
33#endif /* BOOT_COMPRESSED_EBOOT_H */ 33#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index b87a7582853d..302517929932 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -102,7 +102,7 @@ static bool memmap_too_large;
102 102
103 103
104/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */ 104/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
105unsigned long long mem_limit = ULLONG_MAX; 105static unsigned long long mem_limit = ULLONG_MAX;
106 106
107 107
108enum mem_avoid_index { 108enum mem_avoid_index {
@@ -215,7 +215,36 @@ static void mem_avoid_memmap(char *str)
215 memmap_too_large = true; 215 memmap_too_large = true;
216} 216}
217 217
218static int handle_mem_memmap(void) 218/* Store the number of 1GB huge pages which users specified: */
219static unsigned long max_gb_huge_pages;
220
221static void parse_gb_huge_pages(char *param, char *val)
222{
223 static bool gbpage_sz;
224 char *p;
225
226 if (!strcmp(param, "hugepagesz")) {
227 p = val;
228 if (memparse(p, &p) != PUD_SIZE) {
229 gbpage_sz = false;
230 return;
231 }
232
233 if (gbpage_sz)
234 warn("Repeatedly set hugeTLB page size of 1G!\n");
235 gbpage_sz = true;
236 return;
237 }
238
239 if (!strcmp(param, "hugepages") && gbpage_sz) {
240 p = val;
241 max_gb_huge_pages = simple_strtoull(p, &p, 0);
242 return;
243 }
244}
245
246
247static int handle_mem_options(void)
219{ 248{
220 char *args = (char *)get_cmd_line_ptr(); 249 char *args = (char *)get_cmd_line_ptr();
221 size_t len = strlen((char *)args); 250 size_t len = strlen((char *)args);
@@ -223,7 +252,8 @@ static int handle_mem_memmap(void)
223 char *param, *val; 252 char *param, *val;
224 u64 mem_size; 253 u64 mem_size;
225 254
226 if (!strstr(args, "memmap=") && !strstr(args, "mem=")) 255 if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
256 !strstr(args, "hugepages"))
227 return 0; 257 return 0;
228 258
229 tmp_cmdline = malloc(len + 1); 259 tmp_cmdline = malloc(len + 1);
@@ -248,6 +278,8 @@ static int handle_mem_memmap(void)
248 278
249 if (!strcmp(param, "memmap")) { 279 if (!strcmp(param, "memmap")) {
250 mem_avoid_memmap(val); 280 mem_avoid_memmap(val);
281 } else if (strstr(param, "hugepages")) {
282 parse_gb_huge_pages(param, val);
251 } else if (!strcmp(param, "mem")) { 283 } else if (!strcmp(param, "mem")) {
252 char *p = val; 284 char *p = val;
253 285
@@ -387,7 +419,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
387 /* We don't need to set a mapping for setup_data. */ 419 /* We don't need to set a mapping for setup_data. */
388 420
389 /* Mark the memmap regions we need to avoid */ 421 /* Mark the memmap regions we need to avoid */
390 handle_mem_memmap(); 422 handle_mem_options();
391 423
392#ifdef CONFIG_X86_VERBOSE_BOOTUP 424#ifdef CONFIG_X86_VERBOSE_BOOTUP
393 /* Make sure video RAM can be used. */ 425 /* Make sure video RAM can be used. */
@@ -466,6 +498,60 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
466 } 498 }
467} 499}
468 500
501/*
502 * Skip as many 1GB huge pages as possible in the passed region
503 * according to the number which users specified:
504 */
505static void
506process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
507{
508 unsigned long addr, size = 0;
509 struct mem_vector tmp;
510 int i = 0;
511
512 if (!max_gb_huge_pages) {
513 store_slot_info(region, image_size);
514 return;
515 }
516
517 addr = ALIGN(region->start, PUD_SIZE);
518 /* Did we raise the address above the passed in memory entry? */
519 if (addr < region->start + region->size)
520 size = region->size - (addr - region->start);
521
522 /* Check how many 1GB huge pages can be filtered out: */
523 while (size > PUD_SIZE && max_gb_huge_pages) {
524 size -= PUD_SIZE;
525 max_gb_huge_pages--;
526 i++;
527 }
528
529 /* No good 1GB huge pages found: */
530 if (!i) {
531 store_slot_info(region, image_size);
532 return;
533 }
534
535 /*
536 * Skip those 'i'*1GB good huge pages, and continue checking and
537 * processing the remaining head or tail part of the passed region
538 * if available.
539 */
540
541 if (addr >= region->start + image_size) {
542 tmp.start = region->start;
543 tmp.size = addr - region->start;
544 store_slot_info(&tmp, image_size);
545 }
546
547 size = region->size - (addr - region->start) - i * PUD_SIZE;
548 if (size >= image_size) {
549 tmp.start = addr + i * PUD_SIZE;
550 tmp.size = size;
551 store_slot_info(&tmp, image_size);
552 }
553}
554
469static unsigned long slots_fetch_random(void) 555static unsigned long slots_fetch_random(void)
470{ 556{
471 unsigned long slot; 557 unsigned long slot;
@@ -546,7 +632,7 @@ static void process_mem_region(struct mem_vector *entry,
546 632
547 /* If nothing overlaps, store the region and return. */ 633 /* If nothing overlaps, store the region and return. */
548 if (!mem_avoid_overlap(&region, &overlap)) { 634 if (!mem_avoid_overlap(&region, &overlap)) {
549 store_slot_info(&region, image_size); 635 process_gb_huge_pages(&region, image_size);
550 return; 636 return;
551 } 637 }
552 638
@@ -556,7 +642,7 @@ static void process_mem_region(struct mem_vector *entry,
556 642
557 beginning.start = region.start; 643 beginning.start = region.start;
558 beginning.size = overlap.start - region.start; 644 beginning.size = overlap.start - region.start;
559 store_slot_info(&beginning, image_size); 645 process_gb_huge_pages(&beginning, image_size);
560 } 646 }
561 647
562 /* Return if overlap extends to or past end of region. */ 648 /* Return if overlap extends to or past end of region. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 8c5107545251..9e2157371491 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,3 +1,4 @@
1#include <asm/e820/types.h>
1#include <asm/processor.h> 2#include <asm/processor.h>
2#include "pgtable.h" 3#include "pgtable.h"
3#include "../string.h" 4#include "../string.h"
@@ -34,10 +35,62 @@ unsigned long *trampoline_32bit __section(.data);
34extern struct boot_params *boot_params; 35extern struct boot_params *boot_params;
35int cmdline_find_option_bool(const char *option); 36int cmdline_find_option_bool(const char *option);
36 37
38static unsigned long find_trampoline_placement(void)
39{
40 unsigned long bios_start, ebda_start;
41 unsigned long trampoline_start;
42 struct boot_e820_entry *entry;
43 int i;
44
45 /*
46 * Find a suitable spot for the trampoline.
47 * This code is based on reserve_bios_regions().
48 */
49
50 ebda_start = *(unsigned short *)0x40e << 4;
51 bios_start = *(unsigned short *)0x413 << 10;
52
53 if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
54 bios_start = BIOS_START_MAX;
55
56 if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
57 bios_start = ebda_start;
58
59 bios_start = round_down(bios_start, PAGE_SIZE);
60
61 /* Find the first usable memory region under bios_start. */
62 for (i = boot_params->e820_entries - 1; i >= 0; i--) {
63 entry = &boot_params->e820_table[i];
64
65 /* Skip all entries above bios_start. */
66 if (bios_start <= entry->addr)
67 continue;
68
69 /* Skip non-RAM entries. */
70 if (entry->type != E820_TYPE_RAM)
71 continue;
72
73 /* Adjust bios_start to the end of the entry if needed. */
74 if (bios_start > entry->addr + entry->size)
75 bios_start = entry->addr + entry->size;
76
77 /* Keep bios_start page-aligned. */
78 bios_start = round_down(bios_start, PAGE_SIZE);
79
80 /* Skip the entry if it's too small. */
81 if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr)
82 continue;
83
84 break;
85 }
86
87 /* Place the trampoline just below the end of low memory */
88 return bios_start - TRAMPOLINE_32BIT_SIZE;
89}
90
37struct paging_config paging_prepare(void *rmode) 91struct paging_config paging_prepare(void *rmode)
38{ 92{
39 struct paging_config paging_config = {}; 93 struct paging_config paging_config = {};
40 unsigned long bios_start, ebda_start;
41 94
42 /* Initialize boot_params. Required for cmdline_find_option_bool(). */ 95 /* Initialize boot_params. Required for cmdline_find_option_bool(). */
43 boot_params = rmode; 96 boot_params = rmode;
@@ -61,23 +114,7 @@ struct paging_config paging_prepare(void *rmode)
61 paging_config.l5_required = 1; 114 paging_config.l5_required = 1;
62 } 115 }
63 116
64 /* 117 paging_config.trampoline_start = find_trampoline_placement();
65 * Find a suitable spot for the trampoline.
66 * This code is based on reserve_bios_regions().
67 */
68
69 ebda_start = *(unsigned short *)0x40e << 4;
70 bios_start = *(unsigned short *)0x413 << 10;
71
72 if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
73 bios_start = BIOS_START_MAX;
74
75 if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
76 bios_start = ebda_start;
77
78 /* Place the trampoline just below the end of low memory, aligned to 4k */
79 paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE;
80 paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE);
81 118
82 trampoline_32bit = (unsigned long *)paging_config.trampoline_start; 119 trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
83 120
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 16f49123d747..c4428a176973 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -13,6 +13,7 @@
13 */ 13 */
14 14
15#include <linux/types.h> 15#include <linux/types.h>
16#include <asm/asm.h>
16#include "ctype.h" 17#include "ctype.h"
17#include "string.h" 18#include "string.h"
18 19
@@ -28,8 +29,8 @@
28int memcmp(const void *s1, const void *s2, size_t len) 29int memcmp(const void *s1, const void *s2, size_t len)
29{ 30{
30 bool diff; 31 bool diff;
31 asm("repe; cmpsb; setnz %0" 32 asm("repe; cmpsb" CC_SET(nz)
32 : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); 33 : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
33 return diff; 34 return diff;
34} 35}
35 36
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 717bf0776421..5f7e43d4f64a 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -75,7 +75,7 @@
75 * %r9 75 * %r9
76 */ 76 */
77__load_partial: 77__load_partial:
78 xor %r9, %r9 78 xor %r9d, %r9d
79 pxor MSG, MSG 79 pxor MSG, MSG
80 80
81 mov LEN, %r8 81 mov LEN, %r8
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 5de7c0d46edf..acd11b3bf639 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128_aesni_alg[] = {
375 } 375 }
376}; 376};
377 377
378static const struct x86_cpu_id aesni_cpu_id[] = {
379 X86_FEATURE_MATCH(X86_FEATURE_AES),
380 X86_FEATURE_MATCH(X86_FEATURE_XMM2),
381 {}
382};
383MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
384
385static int __init crypto_aegis128_aesni_module_init(void) 378static int __init crypto_aegis128_aesni_module_init(void)
386{ 379{
387 if (!x86_match_cpu(aesni_cpu_id)) 380 if (!boot_cpu_has(X86_FEATURE_XMM2) ||
381 !boot_cpu_has(X86_FEATURE_AES) ||
382 !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
383 !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
388 return -ENODEV; 384 return -ENODEV;
389 385
390 return crypto_register_aeads(crypto_aegis128_aesni_alg, 386 return crypto_register_aeads(crypto_aegis128_aesni_alg,
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
index 4eda2b8db9e1..491dd61c845c 100644
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -66,7 +66,7 @@
66 * %r9 66 * %r9
67 */ 67 */
68__load_partial: 68__load_partial:
69 xor %r9, %r9 69 xor %r9d, %r9d
70 pxor MSG0, MSG0 70 pxor MSG0, MSG0
71 pxor MSG1, MSG1 71 pxor MSG1, MSG1
72 72
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index 876e4866e633..2071c3d1ae07 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128l_aesni_alg[] = {
375 } 375 }
376}; 376};
377 377
378static const struct x86_cpu_id aesni_cpu_id[] = {
379 X86_FEATURE_MATCH(X86_FEATURE_AES),
380 X86_FEATURE_MATCH(X86_FEATURE_XMM2),
381 {}
382};
383MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
384
385static int __init crypto_aegis128l_aesni_module_init(void) 378static int __init crypto_aegis128l_aesni_module_init(void)
386{ 379{
387 if (!x86_match_cpu(aesni_cpu_id)) 380 if (!boot_cpu_has(X86_FEATURE_XMM2) ||
381 !boot_cpu_has(X86_FEATURE_AES) ||
382 !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
383 !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
388 return -ENODEV; 384 return -ENODEV;
389 385
390 return crypto_register_aeads(crypto_aegis128l_aesni_alg, 386 return crypto_register_aeads(crypto_aegis128l_aesni_alg,
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
index 32aae8397268..8870c7c5d9a4 100644
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -59,7 +59,7 @@
59 * %r9 59 * %r9
60 */ 60 */
61__load_partial: 61__load_partial:
62 xor %r9, %r9 62 xor %r9d, %r9d
63 pxor MSG, MSG 63 pxor MSG, MSG
64 64
65 mov LEN, %r8 65 mov LEN, %r8
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index 2b5dd3af8f4d..b5f2a8fd5a71 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis256_aesni_alg[] = {
375 } 375 }
376}; 376};
377 377
378static const struct x86_cpu_id aesni_cpu_id[] = {
379 X86_FEATURE_MATCH(X86_FEATURE_AES),
380 X86_FEATURE_MATCH(X86_FEATURE_XMM2),
381 {}
382};
383MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
384
385static int __init crypto_aegis256_aesni_module_init(void) 378static int __init crypto_aegis256_aesni_module_init(void)
386{ 379{
387 if (!x86_match_cpu(aesni_cpu_id)) 380 if (!boot_cpu_has(X86_FEATURE_XMM2) ||
381 !boot_cpu_has(X86_FEATURE_AES) ||
382 !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
383 !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
388 return -ENODEV; 384 return -ENODEV;
389 385
390 return crypto_register_aeads(crypto_aegis256_aesni_alg, 386 return crypto_register_aeads(crypto_aegis256_aesni_alg,
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index e762ef417562..9bd139569b41 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
258.macro GCM_INIT Iv SUBKEY AAD AADLEN 258.macro GCM_INIT Iv SUBKEY AAD AADLEN
259 mov \AADLEN, %r11 259 mov \AADLEN, %r11
260 mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length 260 mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
261 xor %r11, %r11 261 xor %r11d, %r11d
262 mov %r11, InLen(%arg2) # ctx_data.in_length = 0 262 mov %r11, InLen(%arg2) # ctx_data.in_length = 0
263 mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 263 mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
264 mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 264 mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
@@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
286 movdqu HashKey(%arg2), %xmm13 286 movdqu HashKey(%arg2), %xmm13
287 add %arg5, InLen(%arg2) 287 add %arg5, InLen(%arg2)
288 288
289 xor %r11, %r11 # initialise the data pointer offset as zero 289 xor %r11d, %r11d # initialise the data pointer offset as zero
290 PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation 290 PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
291 291
292 sub %r11, %arg5 # sub partial block data used 292 sub %r11, %arg5 # sub partial block data used
@@ -702,7 +702,7 @@ _no_extra_mask_1_\@:
702 702
703 # GHASH computation for the last <16 Byte block 703 # GHASH computation for the last <16 Byte block
704 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 704 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
705 xor %rax,%rax 705 xor %eax, %eax
706 706
707 mov %rax, PBlockLen(%arg2) 707 mov %rax, PBlockLen(%arg2)
708 jmp _dec_done_\@ 708 jmp _dec_done_\@
@@ -737,7 +737,7 @@ _no_extra_mask_2_\@:
737 737
738 # GHASH computation for the last <16 Byte block 738 # GHASH computation for the last <16 Byte block
739 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 739 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
740 xor %rax,%rax 740 xor %eax, %eax
741 741
742 mov %rax, PBlockLen(%arg2) 742 mov %rax, PBlockLen(%arg2)
743 jmp _encode_done_\@ 743 jmp _encode_done_\@
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index faecb1518bf8..1985ea0b551b 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -463,7 +463,7 @@ _get_AAD_rest_final\@:
463 463
464_get_AAD_done\@: 464_get_AAD_done\@:
465 # initialize the data pointer offset as zero 465 # initialize the data pointer offset as zero
466 xor %r11, %r11 466 xor %r11d, %r11d
467 467
468 # start AES for num_initial_blocks blocks 468 # start AES for num_initial_blocks blocks
469 mov arg5, %rax # rax = *Y0 469 mov arg5, %rax # rax = *Y0
@@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@:
1770 1770
1771_get_AAD_done\@: 1771_get_AAD_done\@:
1772 # initialize the data pointer offset as zero 1772 # initialize the data pointer offset as zero
1773 xor %r11, %r11 1773 xor %r11d, %r11d
1774 1774
1775 # start AES for num_initial_blocks blocks 1775 # start AES for num_initial_blocks blocks
1776 mov arg5, %rax # rax = *Y0 1776 mov arg5, %rax # rax = *Y0
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
index 07653d4582a6..de182c460f82 100644
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ b/arch/x86/crypto/morus1280-avx2-asm.S
@@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero)
113 * %r9 113 * %r9
114 */ 114 */
115__load_partial: 115__load_partial:
116 xor %r9, %r9 116 xor %r9d, %r9d
117 vpxor MSG, MSG, MSG 117 vpxor MSG, MSG, MSG
118 118
119 mov %rcx, %r8 119 mov %rcx, %r8
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
index f111f36d26dc..6634907d6ccd 100644
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ b/arch/x86/crypto/morus1280-avx2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
37 37
38MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400); 38MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
39 39
40static const struct x86_cpu_id avx2_cpu_id[] = {
41 X86_FEATURE_MATCH(X86_FEATURE_AVX2),
42 {}
43};
44MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
45
46static int __init crypto_morus1280_avx2_module_init(void) 40static int __init crypto_morus1280_avx2_module_init(void)
47{ 41{
48 if (!x86_match_cpu(avx2_cpu_id)) 42 if (!boot_cpu_has(X86_FEATURE_AVX2) ||
43 !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
44 !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
49 return -ENODEV; 45 return -ENODEV;
50 46
51 return crypto_register_aeads(crypto_morus1280_avx2_algs, 47 return crypto_register_aeads(crypto_morus1280_avx2_algs,
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
index bd1aa1b60869..da5d2905db60 100644
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ b/arch/x86/crypto/morus1280-sse2-asm.S
@@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero)
235 * %r9 235 * %r9
236 */ 236 */
237__load_partial: 237__load_partial:
238 xor %r9, %r9 238 xor %r9d, %r9d
239 pxor MSG_LO, MSG_LO 239 pxor MSG_LO, MSG_LO
240 pxor MSG_HI, MSG_HI 240 pxor MSG_HI, MSG_HI
241 241
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
index 839270aa713c..95cf857d2cbb 100644
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
37 37
38MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); 38MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
39 39
40static const struct x86_cpu_id sse2_cpu_id[] = {
41 X86_FEATURE_MATCH(X86_FEATURE_XMM2),
42 {}
43};
44MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
45
46static int __init crypto_morus1280_sse2_module_init(void) 40static int __init crypto_morus1280_sse2_module_init(void)
47{ 41{
48 if (!x86_match_cpu(sse2_cpu_id)) 42 if (!boot_cpu_has(X86_FEATURE_XMM2) ||
43 !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
44 !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
49 return -ENODEV; 45 return -ENODEV;
50 46
51 return crypto_register_aeads(crypto_morus1280_sse2_algs, 47 return crypto_register_aeads(crypto_morus1280_sse2_algs,
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
index efa02816d921..414db480250e 100644
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ b/arch/x86/crypto/morus640-sse2-asm.S
@@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero)
113 * %r9 113 * %r9
114 */ 114 */
115__load_partial: 115__load_partial:
116 xor %r9, %r9 116 xor %r9d, %r9d
117 pxor MSG, MSG 117 pxor MSG, MSG
118 118
119 mov %rcx, %r8 119 mov %rcx, %r8
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
index 26b47e2db8d2..615fb7bc9a32 100644
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
37 37
38MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); 38MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
39 39
40static const struct x86_cpu_id sse2_cpu_id[] = {
41 X86_FEATURE_MATCH(X86_FEATURE_XMM2),
42 {}
43};
44MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
45
46static int __init crypto_morus640_sse2_module_init(void) 40static int __init crypto_morus640_sse2_module_init(void)
47{ 41{
48 if (!x86_match_cpu(sse2_cpu_id)) 42 if (!boot_cpu_has(X86_FEATURE_XMM2) ||
43 !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
44 !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
49 return -ENODEV; 45 return -ENODEV;
50 46
51 return crypto_register_aeads(crypto_morus640_sse2_algs, 47 return crypto_register_aeads(crypto_morus640_sse2_algs,
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 6204bd53528c..613d0bfc3d84 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -96,7 +96,7 @@
96 # cleanup workspace 96 # cleanup workspace
97 mov $8, %ecx 97 mov $8, %ecx
98 mov %rsp, %rdi 98 mov %rsp, %rdi
99 xor %rax, %rax 99 xor %eax, %eax
100 rep stosq 100 rep stosq
101 101
102 mov %rbp, %rsp # deallocate workspace 102 mov %rbp, %rsp # deallocate workspace
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index c371bfee137a..2767c625a52c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -65,7 +65,7 @@
65# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 65# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
66#else 66#else
67# define preempt_stop(clobbers) 67# define preempt_stop(clobbers)
68# define resume_kernel restore_all 68# define resume_kernel restore_all_kernel
69#endif 69#endif
70 70
71.macro TRACE_IRQS_IRET 71.macro TRACE_IRQS_IRET
@@ -77,6 +77,8 @@
77#endif 77#endif
78.endm 78.endm
79 79
80#define PTI_SWITCH_MASK (1 << PAGE_SHIFT)
81
80/* 82/*
81 * User gs save/restore 83 * User gs save/restore
82 * 84 *
@@ -154,7 +156,52 @@
154 156
155#endif /* CONFIG_X86_32_LAZY_GS */ 157#endif /* CONFIG_X86_32_LAZY_GS */
156 158
157.macro SAVE_ALL pt_regs_ax=%eax 159/* Unconditionally switch to user cr3 */
160.macro SWITCH_TO_USER_CR3 scratch_reg:req
161 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
162
163 movl %cr3, \scratch_reg
164 orl $PTI_SWITCH_MASK, \scratch_reg
165 movl \scratch_reg, %cr3
166.Lend_\@:
167.endm
168
169.macro BUG_IF_WRONG_CR3 no_user_check=0
170#ifdef CONFIG_DEBUG_ENTRY
171 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
172 .if \no_user_check == 0
173 /* coming from usermode? */
174 testl $SEGMENT_RPL_MASK, PT_CS(%esp)
175 jz .Lend_\@
176 .endif
177 /* On user-cr3? */
178 movl %cr3, %eax
179 testl $PTI_SWITCH_MASK, %eax
180 jnz .Lend_\@
181 /* From userspace with kernel cr3 - BUG */
182 ud2
183.Lend_\@:
184#endif
185.endm
186
187/*
188 * Switch to kernel cr3 if not already loaded and return current cr3 in
189 * \scratch_reg
190 */
191.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
192 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
193 movl %cr3, \scratch_reg
194 /* Test if we are already on kernel CR3 */
195 testl $PTI_SWITCH_MASK, \scratch_reg
196 jz .Lend_\@
197 andl $(~PTI_SWITCH_MASK), \scratch_reg
198 movl \scratch_reg, %cr3
199 /* Return original CR3 in \scratch_reg */
200 orl $PTI_SWITCH_MASK, \scratch_reg
201.Lend_\@:
202.endm
203
204.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
158 cld 205 cld
159 PUSH_GS 206 PUSH_GS
160 pushl %fs 207 pushl %fs
@@ -173,6 +220,29 @@
173 movl $(__KERNEL_PERCPU), %edx 220 movl $(__KERNEL_PERCPU), %edx
174 movl %edx, %fs 221 movl %edx, %fs
175 SET_KERNEL_GS %edx 222 SET_KERNEL_GS %edx
223
224 /* Switch to kernel stack if necessary */
225.if \switch_stacks > 0
226 SWITCH_TO_KERNEL_STACK
227.endif
228
229.endm
230
231.macro SAVE_ALL_NMI cr3_reg:req
232 SAVE_ALL
233
234 BUG_IF_WRONG_CR3
235
236 /*
237 * Now switch the CR3 when PTI is enabled.
238 *
239 * We can enter with either user or kernel cr3, the code will
240 * store the old cr3 in \cr3_reg and switches to the kernel cr3
241 * if necessary.
242 */
243 SWITCH_TO_KERNEL_CR3 scratch_reg=\cr3_reg
244
245.Lend_\@:
176.endm 246.endm
177 247
178/* 248/*
@@ -221,6 +291,349 @@
221 POP_GS_EX 291 POP_GS_EX
222.endm 292.endm
223 293
294.macro RESTORE_ALL_NMI cr3_reg:req pop=0
295 /*
296 * Now switch the CR3 when PTI is enabled.
297 *
298 * We enter with kernel cr3 and switch the cr3 to the value
299 * stored on \cr3_reg, which is either a user or a kernel cr3.
300 */
301 ALTERNATIVE "jmp .Lswitched_\@", "", X86_FEATURE_PTI
302
303 testl $PTI_SWITCH_MASK, \cr3_reg
304 jz .Lswitched_\@
305
306 /* User cr3 in \cr3_reg - write it to hardware cr3 */
307 movl \cr3_reg, %cr3
308
309.Lswitched_\@:
310
311 BUG_IF_WRONG_CR3
312
313 RESTORE_REGS pop=\pop
314.endm
315
316.macro CHECK_AND_APPLY_ESPFIX
317#ifdef CONFIG_X86_ESPFIX32
318#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
319
320 ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX
321
322 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
323 /*
324 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
325 * are returning to the kernel.
326 * See comments in process.c:copy_thread() for details.
327 */
328 movb PT_OLDSS(%esp), %ah
329 movb PT_CS(%esp), %al
330 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
331 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
332 jne .Lend_\@ # returning to user-space with LDT SS
333
334 /*
335 * Setup and switch to ESPFIX stack
336 *
337 * We're returning to userspace with a 16 bit stack. The CPU will not
338 * restore the high word of ESP for us on executing iret... This is an
339 * "official" bug of all the x86-compatible CPUs, which we can work
340 * around to make dosemu and wine happy. We do this by preloading the
341 * high word of ESP with the high word of the userspace ESP while
342 * compensating for the offset by changing to the ESPFIX segment with
343 * a base address that matches for the difference.
344 */
345 mov %esp, %edx /* load kernel esp */
346 mov PT_OLDESP(%esp), %eax /* load userspace esp */
347 mov %dx, %ax /* eax: new kernel esp */
348 sub %eax, %edx /* offset (low word is 0) */
349 shr $16, %edx
350 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
351 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
352 pushl $__ESPFIX_SS
353 pushl %eax /* new kernel esp */
354 /*
355 * Disable interrupts, but do not irqtrace this section: we
356 * will soon execute iret and the tracer was already set to
357 * the irqstate after the IRET:
358 */
359 DISABLE_INTERRUPTS(CLBR_ANY)
360 lss (%esp), %esp /* switch to espfix segment */
361.Lend_\@:
362#endif /* CONFIG_X86_ESPFIX32 */
363.endm
364
365/*
366 * Called with pt_regs fully populated and kernel segments loaded,
367 * so we can access PER_CPU and use the integer registers.
368 *
369 * We need to be very careful here with the %esp switch, because an NMI
370 * can happen everywhere. If the NMI handler finds itself on the
371 * entry-stack, it will overwrite the task-stack and everything we
372 * copied there. So allocate the stack-frame on the task-stack and
373 * switch to it before we do any copying.
374 */
375
376#define CS_FROM_ENTRY_STACK (1 << 31)
377#define CS_FROM_USER_CR3 (1 << 30)
378
379.macro SWITCH_TO_KERNEL_STACK
380
381 ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
382
383 BUG_IF_WRONG_CR3
384
385 SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
386
387 /*
388 * %eax now contains the entry cr3 and we carry it forward in
389 * that register for the time this macro runs
390 */
391
392 /* Are we on the entry stack? Bail out if not! */
393 movl PER_CPU_VAR(cpu_entry_area), %ecx
394 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
395 subl %esp, %ecx /* ecx = (end of entry_stack) - esp */
396 cmpl $SIZEOF_entry_stack, %ecx
397 jae .Lend_\@
398
399 /* Load stack pointer into %esi and %edi */
400 movl %esp, %esi
401 movl %esi, %edi
402
403 /* Move %edi to the top of the entry stack */
404 andl $(MASK_entry_stack), %edi
405 addl $(SIZEOF_entry_stack), %edi
406
407 /* Load top of task-stack into %edi */
408 movl TSS_entry2task_stack(%edi), %edi
409
410 /*
411 * Clear unused upper bits of the dword containing the word-sized CS
412 * slot in pt_regs in case hardware didn't clear it for us.
413 */
414 andl $(0x0000ffff), PT_CS(%esp)
415
416 /* Special case - entry from kernel mode via entry stack */
417#ifdef CONFIG_VM86
418 movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS
419 movb PT_CS(%esp), %cl
420 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %ecx
421#else
422 movl PT_CS(%esp), %ecx
423 andl $SEGMENT_RPL_MASK, %ecx
424#endif
425 cmpl $USER_RPL, %ecx
426 jb .Lentry_from_kernel_\@
427
428 /* Bytes to copy */
429 movl $PTREGS_SIZE, %ecx
430
431#ifdef CONFIG_VM86
432 testl $X86_EFLAGS_VM, PT_EFLAGS(%esi)
433 jz .Lcopy_pt_regs_\@
434
435 /*
436 * Stack-frame contains 4 additional segment registers when
437 * coming from VM86 mode
438 */
439 addl $(4 * 4), %ecx
440
441#endif
442.Lcopy_pt_regs_\@:
443
444 /* Allocate frame on task-stack */
445 subl %ecx, %edi
446
447 /* Switch to task-stack */
448 movl %edi, %esp
449
450 /*
451 * We are now on the task-stack and can safely copy over the
452 * stack-frame
453 */
454 shrl $2, %ecx
455 cld
456 rep movsl
457
458 jmp .Lend_\@
459
460.Lentry_from_kernel_\@:
461
462 /*
463 * This handles the case when we enter the kernel from
464 * kernel-mode and %esp points to the entry-stack. When this
465 * happens we need to switch to the task-stack to run C code,
466 * but switch back to the entry-stack again when we approach
467 * iret and return to the interrupted code-path. This usually
468 * happens when we hit an exception while restoring user-space
469 * segment registers on the way back to user-space or when the
470 * sysenter handler runs with eflags.tf set.
471 *
472 * When we switch to the task-stack here, we can't trust the
473 * contents of the entry-stack anymore, as the exception handler
474 * might be scheduled out or moved to another CPU. Therefore we
475 * copy the complete entry-stack to the task-stack and set a
476 * marker in the iret-frame (bit 31 of the CS dword) to detect
477 * what we've done on the iret path.
478 *
479 * On the iret path we copy everything back and switch to the
480 * entry-stack, so that the interrupted kernel code-path
481 * continues on the same stack it was interrupted with.
482 *
483 * Be aware that an NMI can happen anytime in this code.
484 *
485 * %esi: Entry-Stack pointer (same as %esp)
486 * %edi: Top of the task stack
487 * %eax: CR3 on kernel entry
488 */
489
490 /* Calculate number of bytes on the entry stack in %ecx */
491 movl %esi, %ecx
492
493 /* %ecx to the top of entry-stack */
494 andl $(MASK_entry_stack), %ecx
495 addl $(SIZEOF_entry_stack), %ecx
496
497 /* Number of bytes on the entry stack to %ecx */
498 sub %esi, %ecx
499
500 /* Mark stackframe as coming from entry stack */
501 orl $CS_FROM_ENTRY_STACK, PT_CS(%esp)
502
503 /*
504 * Test the cr3 used to enter the kernel and add a marker
505 * so that we can switch back to it before iret.
506 */
507 testl $PTI_SWITCH_MASK, %eax
508 jz .Lcopy_pt_regs_\@
509 orl $CS_FROM_USER_CR3, PT_CS(%esp)
510
511 /*
512 * %esi and %edi are unchanged, %ecx contains the number of
513 * bytes to copy. The code at .Lcopy_pt_regs_\@ will allocate
514 * the stack-frame on task-stack and copy everything over
515 */
516 jmp .Lcopy_pt_regs_\@
517
518.Lend_\@:
519.endm
520
521/*
522 * Switch back from the kernel stack to the entry stack.
523 *
524 * The %esp register must point to pt_regs on the task stack. It will
525 * first calculate the size of the stack-frame to copy, depending on
526 * whether we return to VM86 mode or not. With that it uses 'rep movsl'
527 * to copy the contents of the stack over to the entry stack.
528 *
529 * We must be very careful here, as we can't trust the contents of the
530 * task-stack once we switched to the entry-stack. When an NMI happens
531 * while on the entry-stack, the NMI handler will switch back to the top
532 * of the task stack, overwriting our stack-frame we are about to copy.
533 * Therefore we switch the stack only after everything is copied over.
534 */
535.macro SWITCH_TO_ENTRY_STACK
536
537 ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
538
539 /* Bytes to copy */
540 movl $PTREGS_SIZE, %ecx
541
542#ifdef CONFIG_VM86
543 testl $(X86_EFLAGS_VM), PT_EFLAGS(%esp)
544 jz .Lcopy_pt_regs_\@
545
546 /* Additional 4 registers to copy when returning to VM86 mode */
547 addl $(4 * 4), %ecx
548
549.Lcopy_pt_regs_\@:
550#endif
551
552 /* Initialize source and destination for movsl */
553 movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi
554 subl %ecx, %edi
555 movl %esp, %esi
556
557 /* Save future stack pointer in %ebx */
558 movl %edi, %ebx
559
560 /* Copy over the stack-frame */
561 shrl $2, %ecx
562 cld
563 rep movsl
564
565 /*
566 * Switch to entry-stack - needs to happen after everything is
567 * copied because the NMI handler will overwrite the task-stack
568 * when on entry-stack
569 */
570 movl %ebx, %esp
571
572.Lend_\@:
573.endm
574
575/*
576 * This macro handles the case when we return to kernel-mode on the iret
577 * path and have to switch back to the entry stack and/or user-cr3
578 *
579 * See the comments below the .Lentry_from_kernel_\@ label in the
580 * SWITCH_TO_KERNEL_STACK macro for more details.
581 */
582.macro PARANOID_EXIT_TO_KERNEL_MODE
583
584 /*
585 * Test if we entered the kernel with the entry-stack. Most
586 * likely we did not, because this code only runs on the
587 * return-to-kernel path.
588 */
589 testl $CS_FROM_ENTRY_STACK, PT_CS(%esp)
590 jz .Lend_\@
591
592 /* Unlikely slow-path */
593
594 /* Clear marker from stack-frame */
595 andl $(~CS_FROM_ENTRY_STACK), PT_CS(%esp)
596
597 /* Copy the remaining task-stack contents to entry-stack */
598 movl %esp, %esi
599 movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi
600
601 /* Bytes on the task-stack to ecx */
602 movl PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx
603 subl %esi, %ecx
604
605 /* Allocate stack-frame on entry-stack */
606 subl %ecx, %edi
607
608 /*
609 * Save future stack-pointer, we must not switch until the
610 * copy is done, otherwise the NMI handler could destroy the
611 * contents of the task-stack we are about to copy.
612 */
613 movl %edi, %ebx
614
615 /* Do the copy */
616 shrl $2, %ecx
617 cld
618 rep movsl
619
620 /* Safe to switch to entry-stack now */
621 movl %ebx, %esp
622
623 /*
624 * We came from entry-stack and need to check if we also need to
625 * switch back to user cr3.
626 */
627 testl $CS_FROM_USER_CR3, PT_CS(%esp)
628 jz .Lend_\@
629
630 /* Clear marker from stack-frame */
631 andl $(~CS_FROM_USER_CR3), PT_CS(%esp)
632
633 SWITCH_TO_USER_CR3 scratch_reg=%eax
634
635.Lend_\@:
636.endm
224/* 637/*
225 * %eax: prev task 638 * %eax: prev task
226 * %edx: next task 639 * %edx: next task
@@ -351,9 +764,9 @@ ENTRY(resume_kernel)
351 DISABLE_INTERRUPTS(CLBR_ANY) 764 DISABLE_INTERRUPTS(CLBR_ANY)
352.Lneed_resched: 765.Lneed_resched:
353 cmpl $0, PER_CPU_VAR(__preempt_count) 766 cmpl $0, PER_CPU_VAR(__preempt_count)
354 jnz restore_all 767 jnz restore_all_kernel
355 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? 768 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
356 jz restore_all 769 jz restore_all_kernel
357 call preempt_schedule_irq 770 call preempt_schedule_irq
358 jmp .Lneed_resched 771 jmp .Lneed_resched
359END(resume_kernel) 772END(resume_kernel)
@@ -412,7 +825,21 @@ ENTRY(xen_sysenter_target)
412 * 0(%ebp) arg6 825 * 0(%ebp) arg6
413 */ 826 */
414ENTRY(entry_SYSENTER_32) 827ENTRY(entry_SYSENTER_32)
415 movl TSS_sysenter_sp0(%esp), %esp 828 /*
829 * On entry-stack with all userspace-regs live - save and
830 * restore eflags and %eax to use it as scratch-reg for the cr3
831 * switch.
832 */
833 pushfl
834 pushl %eax
835 BUG_IF_WRONG_CR3 no_user_check=1
836 SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
837 popl %eax
838 popfl
839
840 /* Stack empty again, switch to task stack */
841 movl TSS_entry2task_stack(%esp), %esp
842
416.Lsysenter_past_esp: 843.Lsysenter_past_esp:
417 pushl $__USER_DS /* pt_regs->ss */ 844 pushl $__USER_DS /* pt_regs->ss */
418 pushl %ebp /* pt_regs->sp (stashed in bp) */ 845 pushl %ebp /* pt_regs->sp (stashed in bp) */
@@ -421,7 +848,7 @@ ENTRY(entry_SYSENTER_32)
421 pushl $__USER_CS /* pt_regs->cs */ 848 pushl $__USER_CS /* pt_regs->cs */
422 pushl $0 /* pt_regs->ip = 0 (placeholder) */ 849 pushl $0 /* pt_regs->ip = 0 (placeholder) */
423 pushl %eax /* pt_regs->orig_ax */ 850 pushl %eax /* pt_regs->orig_ax */
424 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 851 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest, stack already switched */
425 852
426 /* 853 /*
427 * SYSENTER doesn't filter flags, so we need to clear NT, AC 854 * SYSENTER doesn't filter flags, so we need to clear NT, AC
@@ -460,25 +887,49 @@ ENTRY(entry_SYSENTER_32)
460 887
461/* Opportunistic SYSEXIT */ 888/* Opportunistic SYSEXIT */
462 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 889 TRACE_IRQS_ON /* User mode traces as IRQs on. */
890
891 /*
892 * Setup entry stack - we keep the pointer in %eax and do the
893 * switch after almost all user-state is restored.
894 */
895
896 /* Load entry stack pointer and allocate frame for eflags/eax */
897 movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %eax
898 subl $(2*4), %eax
899
900 /* Copy eflags and eax to entry stack */
901 movl PT_EFLAGS(%esp), %edi
902 movl PT_EAX(%esp), %esi
903 movl %edi, (%eax)
904 movl %esi, 4(%eax)
905
906 /* Restore user registers and segments */
463 movl PT_EIP(%esp), %edx /* pt_regs->ip */ 907 movl PT_EIP(%esp), %edx /* pt_regs->ip */
464 movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ 908 movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
4651: mov PT_FS(%esp), %fs 9091: mov PT_FS(%esp), %fs
466 PTGS_TO_GS 910 PTGS_TO_GS
911
467 popl %ebx /* pt_regs->bx */ 912 popl %ebx /* pt_regs->bx */
468 addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ 913 addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
469 popl %esi /* pt_regs->si */ 914 popl %esi /* pt_regs->si */
470 popl %edi /* pt_regs->di */ 915 popl %edi /* pt_regs->di */
471 popl %ebp /* pt_regs->bp */ 916 popl %ebp /* pt_regs->bp */
472 popl %eax /* pt_regs->ax */ 917
918 /* Switch to entry stack */
919 movl %eax, %esp
920
921 /* Now ready to switch the cr3 */
922 SWITCH_TO_USER_CR3 scratch_reg=%eax
473 923
474 /* 924 /*
475 * Restore all flags except IF. (We restore IF separately because 925 * Restore all flags except IF. (We restore IF separately because
476 * STI gives a one-instruction window in which we won't be interrupted, 926 * STI gives a one-instruction window in which we won't be interrupted,
477 * whereas POPF does not.) 927 * whereas POPF does not.)
478 */ 928 */
479 addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
480 btrl $X86_EFLAGS_IF_BIT, (%esp) 929 btrl $X86_EFLAGS_IF_BIT, (%esp)
930 BUG_IF_WRONG_CR3 no_user_check=1
481 popfl 931 popfl
932 popl %eax
482 933
483 /* 934 /*
484 * Return back to the vDSO, which will pop ecx and edx. 935 * Return back to the vDSO, which will pop ecx and edx.
@@ -532,7 +983,8 @@ ENDPROC(entry_SYSENTER_32)
532ENTRY(entry_INT80_32) 983ENTRY(entry_INT80_32)
533 ASM_CLAC 984 ASM_CLAC
534 pushl %eax /* pt_regs->orig_ax */ 985 pushl %eax /* pt_regs->orig_ax */
535 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 986
987 SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1 /* save rest */
536 988
537 /* 989 /*
538 * User mode is traced as though IRQs are on, and the interrupt gate 990 * User mode is traced as though IRQs are on, and the interrupt gate
@@ -546,24 +998,17 @@ ENTRY(entry_INT80_32)
546 998
547restore_all: 999restore_all:
548 TRACE_IRQS_IRET 1000 TRACE_IRQS_IRET
1001 SWITCH_TO_ENTRY_STACK
549.Lrestore_all_notrace: 1002.Lrestore_all_notrace:
550#ifdef CONFIG_X86_ESPFIX32 1003 CHECK_AND_APPLY_ESPFIX
551 ALTERNATIVE "jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
552
553 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
554 /*
555 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
556 * are returning to the kernel.
557 * See comments in process.c:copy_thread() for details.
558 */
559 movb PT_OLDSS(%esp), %ah
560 movb PT_CS(%esp), %al
561 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
562 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
563 je .Lldt_ss # returning to user-space with LDT SS
564#endif
565.Lrestore_nocheck: 1004.Lrestore_nocheck:
566 RESTORE_REGS 4 # skip orig_eax/error_code 1005 /* Switch back to user CR3 */
1006 SWITCH_TO_USER_CR3 scratch_reg=%eax
1007
1008 BUG_IF_WRONG_CR3
1009
1010 /* Restore user state */
1011 RESTORE_REGS pop=4 # skip orig_eax/error_code
567.Lirq_return: 1012.Lirq_return:
568 /* 1013 /*
569 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization 1014 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -572,46 +1017,33 @@ restore_all:
572 */ 1017 */
573 INTERRUPT_RETURN 1018 INTERRUPT_RETURN
574 1019
1020restore_all_kernel:
1021 TRACE_IRQS_IRET
1022 PARANOID_EXIT_TO_KERNEL_MODE
1023 BUG_IF_WRONG_CR3
1024 RESTORE_REGS 4
1025 jmp .Lirq_return
1026
575.section .fixup, "ax" 1027.section .fixup, "ax"
576ENTRY(iret_exc ) 1028ENTRY(iret_exc )
577 pushl $0 # no error code 1029 pushl $0 # no error code
578 pushl $do_iret_error 1030 pushl $do_iret_error
579 jmp common_exception
580.previous
581 _ASM_EXTABLE(.Lirq_return, iret_exc)
582 1031
583#ifdef CONFIG_X86_ESPFIX32 1032#ifdef CONFIG_DEBUG_ENTRY
584.Lldt_ss:
585/*
586 * Setup and switch to ESPFIX stack
587 *
588 * We're returning to userspace with a 16 bit stack. The CPU will not
589 * restore the high word of ESP for us on executing iret... This is an
590 * "official" bug of all the x86-compatible CPUs, which we can work
591 * around to make dosemu and wine happy. We do this by preloading the
592 * high word of ESP with the high word of the userspace ESP while
593 * compensating for the offset by changing to the ESPFIX segment with
594 * a base address that matches for the difference.
595 */
596#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
597 mov %esp, %edx /* load kernel esp */
598 mov PT_OLDESP(%esp), %eax /* load userspace esp */
599 mov %dx, %ax /* eax: new kernel esp */
600 sub %eax, %edx /* offset (low word is 0) */
601 shr $16, %edx
602 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
603 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
604 pushl $__ESPFIX_SS
605 pushl %eax /* new kernel esp */
606 /* 1033 /*
607 * Disable interrupts, but do not irqtrace this section: we 1034 * The stack-frame here is the one that iret faulted on, so its a
608 * will soon execute iret and the tracer was already set to 1035 * return-to-user frame. We are on kernel-cr3 because we come here from
609 * the irqstate after the IRET: 1036 * the fixup code. This confuses the CR3 checker, so switch to user-cr3
1037 * as the checker expects it.
610 */ 1038 */
611 DISABLE_INTERRUPTS(CLBR_ANY) 1039 pushl %eax
612 lss (%esp), %esp /* switch to espfix segment */ 1040 SWITCH_TO_USER_CR3 scratch_reg=%eax
613 jmp .Lrestore_nocheck 1041 popl %eax
614#endif 1042#endif
1043
1044 jmp common_exception
1045.previous
1046 _ASM_EXTABLE(.Lirq_return, iret_exc)
615ENDPROC(entry_INT80_32) 1047ENDPROC(entry_INT80_32)
616 1048
617.macro FIXUP_ESPFIX_STACK 1049.macro FIXUP_ESPFIX_STACK
@@ -671,7 +1103,8 @@ END(irq_entries_start)
671common_interrupt: 1103common_interrupt:
672 ASM_CLAC 1104 ASM_CLAC
673 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ 1105 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
674 SAVE_ALL 1106
1107 SAVE_ALL switch_stacks=1
675 ENCODE_FRAME_POINTER 1108 ENCODE_FRAME_POINTER
676 TRACE_IRQS_OFF 1109 TRACE_IRQS_OFF
677 movl %esp, %eax 1110 movl %esp, %eax
@@ -679,16 +1112,16 @@ common_interrupt:
679 jmp ret_from_intr 1112 jmp ret_from_intr
680ENDPROC(common_interrupt) 1113ENDPROC(common_interrupt)
681 1114
682#define BUILD_INTERRUPT3(name, nr, fn) \ 1115#define BUILD_INTERRUPT3(name, nr, fn) \
683ENTRY(name) \ 1116ENTRY(name) \
684 ASM_CLAC; \ 1117 ASM_CLAC; \
685 pushl $~(nr); \ 1118 pushl $~(nr); \
686 SAVE_ALL; \ 1119 SAVE_ALL switch_stacks=1; \
687 ENCODE_FRAME_POINTER; \ 1120 ENCODE_FRAME_POINTER; \
688 TRACE_IRQS_OFF \ 1121 TRACE_IRQS_OFF \
689 movl %esp, %eax; \ 1122 movl %esp, %eax; \
690 call fn; \ 1123 call fn; \
691 jmp ret_from_intr; \ 1124 jmp ret_from_intr; \
692ENDPROC(name) 1125ENDPROC(name)
693 1126
694#define BUILD_INTERRUPT(name, nr) \ 1127#define BUILD_INTERRUPT(name, nr) \
@@ -920,16 +1353,20 @@ common_exception:
920 pushl %es 1353 pushl %es
921 pushl %ds 1354 pushl %ds
922 pushl %eax 1355 pushl %eax
1356 movl $(__USER_DS), %eax
1357 movl %eax, %ds
1358 movl %eax, %es
1359 movl $(__KERNEL_PERCPU), %eax
1360 movl %eax, %fs
923 pushl %ebp 1361 pushl %ebp
924 pushl %edi 1362 pushl %edi
925 pushl %esi 1363 pushl %esi
926 pushl %edx 1364 pushl %edx
927 pushl %ecx 1365 pushl %ecx
928 pushl %ebx 1366 pushl %ebx
1367 SWITCH_TO_KERNEL_STACK
929 ENCODE_FRAME_POINTER 1368 ENCODE_FRAME_POINTER
930 cld 1369 cld
931 movl $(__KERNEL_PERCPU), %ecx
932 movl %ecx, %fs
933 UNWIND_ESPFIX_STACK 1370 UNWIND_ESPFIX_STACK
934 GS_TO_REG %ecx 1371 GS_TO_REG %ecx
935 movl PT_GS(%esp), %edi # get the function address 1372 movl PT_GS(%esp), %edi # get the function address
@@ -937,9 +1374,6 @@ common_exception:
937 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1374 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
938 REG_TO_PTGS %ecx 1375 REG_TO_PTGS %ecx
939 SET_KERNEL_GS %ecx 1376 SET_KERNEL_GS %ecx
940 movl $(__USER_DS), %ecx
941 movl %ecx, %ds
942 movl %ecx, %es
943 TRACE_IRQS_OFF 1377 TRACE_IRQS_OFF
944 movl %esp, %eax # pt_regs pointer 1378 movl %esp, %eax # pt_regs pointer
945 CALL_NOSPEC %edi 1379 CALL_NOSPEC %edi
@@ -948,40 +1382,12 @@ END(common_exception)
948 1382
949ENTRY(debug) 1383ENTRY(debug)
950 /* 1384 /*
951 * #DB can happen at the first instruction of 1385 * Entry from sysenter is now handled in common_exception
952 * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
953 * happens, then we will be running on a very small stack. We
954 * need to detect this condition and switch to the thread
955 * stack before calling any C code at all.
956 *
957 * If you edit this code, keep in mind that NMIs can happen in here.
958 */ 1386 */
959 ASM_CLAC 1387 ASM_CLAC
960 pushl $-1 # mark this as an int 1388 pushl $-1 # mark this as an int
961 SAVE_ALL 1389 pushl $do_debug
962 ENCODE_FRAME_POINTER 1390 jmp common_exception
963 xorl %edx, %edx # error code 0
964 movl %esp, %eax # pt_regs pointer
965
966 /* Are we currently on the SYSENTER stack? */
967 movl PER_CPU_VAR(cpu_entry_area), %ecx
968 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
969 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
970 cmpl $SIZEOF_entry_stack, %ecx
971 jb .Ldebug_from_sysenter_stack
972
973 TRACE_IRQS_OFF
974 call do_debug
975 jmp ret_from_exception
976
977.Ldebug_from_sysenter_stack:
978 /* We're on the SYSENTER stack. Switch off. */
979 movl %esp, %ebx
980 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
981 TRACE_IRQS_OFF
982 call do_debug
983 movl %ebx, %esp
984 jmp ret_from_exception
985END(debug) 1391END(debug)
986 1392
987/* 1393/*
@@ -993,6 +1399,7 @@ END(debug)
993 */ 1399 */
994ENTRY(nmi) 1400ENTRY(nmi)
995 ASM_CLAC 1401 ASM_CLAC
1402
996#ifdef CONFIG_X86_ESPFIX32 1403#ifdef CONFIG_X86_ESPFIX32
997 pushl %eax 1404 pushl %eax
998 movl %ss, %eax 1405 movl %ss, %eax
@@ -1002,7 +1409,7 @@ ENTRY(nmi)
1002#endif 1409#endif
1003 1410
1004 pushl %eax # pt_regs->orig_ax 1411 pushl %eax # pt_regs->orig_ax
1005 SAVE_ALL 1412 SAVE_ALL_NMI cr3_reg=%edi
1006 ENCODE_FRAME_POINTER 1413 ENCODE_FRAME_POINTER
1007 xorl %edx, %edx # zero error code 1414 xorl %edx, %edx # zero error code
1008 movl %esp, %eax # pt_regs pointer 1415 movl %esp, %eax # pt_regs pointer
@@ -1016,7 +1423,7 @@ ENTRY(nmi)
1016 1423
1017 /* Not on SYSENTER stack. */ 1424 /* Not on SYSENTER stack. */
1018 call do_nmi 1425 call do_nmi
1019 jmp .Lrestore_all_notrace 1426 jmp .Lnmi_return
1020 1427
1021.Lnmi_from_sysenter_stack: 1428.Lnmi_from_sysenter_stack:
1022 /* 1429 /*
@@ -1027,7 +1434,11 @@ ENTRY(nmi)
1027 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1434 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
1028 call do_nmi 1435 call do_nmi
1029 movl %ebx, %esp 1436 movl %ebx, %esp
1030 jmp .Lrestore_all_notrace 1437
1438.Lnmi_return:
1439 CHECK_AND_APPLY_ESPFIX
1440 RESTORE_ALL_NMI cr3_reg=%edi pop=4
1441 jmp .Lirq_return
1031 1442
1032#ifdef CONFIG_X86_ESPFIX32 1443#ifdef CONFIG_X86_ESPFIX32
1033.Lnmi_espfix_stack: 1444.Lnmi_espfix_stack:
@@ -1042,12 +1453,12 @@ ENTRY(nmi)
1042 pushl 16(%esp) 1453 pushl 16(%esp)
1043 .endr 1454 .endr
1044 pushl %eax 1455 pushl %eax
1045 SAVE_ALL 1456 SAVE_ALL_NMI cr3_reg=%edi
1046 ENCODE_FRAME_POINTER 1457 ENCODE_FRAME_POINTER
1047 FIXUP_ESPFIX_STACK # %eax == %esp 1458 FIXUP_ESPFIX_STACK # %eax == %esp
1048 xorl %edx, %edx # zero error code 1459 xorl %edx, %edx # zero error code
1049 call do_nmi 1460 call do_nmi
1050 RESTORE_REGS 1461 RESTORE_ALL_NMI cr3_reg=%edi
1051 lss 12+4(%esp), %esp # back to espfix stack 1462 lss 12+4(%esp), %esp # back to espfix stack
1052 jmp .Lirq_return 1463 jmp .Lirq_return
1053#endif 1464#endif
@@ -1056,7 +1467,8 @@ END(nmi)
1056ENTRY(int3) 1467ENTRY(int3)
1057 ASM_CLAC 1468 ASM_CLAC
1058 pushl $-1 # mark this as an int 1469 pushl $-1 # mark this as an int
1059 SAVE_ALL 1470
1471 SAVE_ALL switch_stacks=1
1060 ENCODE_FRAME_POINTER 1472 ENCODE_FRAME_POINTER
1061 TRACE_IRQS_OFF 1473 TRACE_IRQS_OFF
1062 xorl %edx, %edx # zero error code 1474 xorl %edx, %edx # zero error code
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8ae7ffda8f98..957dfb693ecc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -92,7 +92,7 @@ END(native_usergs_sysret64)
92.endm 92.endm
93 93
94.macro TRACE_IRQS_IRETQ_DEBUG 94.macro TRACE_IRQS_IRETQ_DEBUG
95 bt $9, EFLAGS(%rsp) /* interrupts off? */ 95 btl $9, EFLAGS(%rsp) /* interrupts off? */
96 jnc 1f 96 jnc 1f
97 TRACE_IRQS_ON_DEBUG 97 TRACE_IRQS_ON_DEBUG
981: 981:
@@ -408,6 +408,7 @@ ENTRY(ret_from_fork)
408 408
4091: 4091:
410 /* kernel thread */ 410 /* kernel thread */
411 UNWIND_HINT_EMPTY
411 movq %r12, %rdi 412 movq %r12, %rdi
412 CALL_NOSPEC %rbx 413 CALL_NOSPEC %rbx
413 /* 414 /*
@@ -701,7 +702,7 @@ retint_kernel:
701#ifdef CONFIG_PREEMPT 702#ifdef CONFIG_PREEMPT
702 /* Interrupts are off */ 703 /* Interrupts are off */
703 /* Check if we need preemption */ 704 /* Check if we need preemption */
704 bt $9, EFLAGS(%rsp) /* were interrupts off? */ 705 btl $9, EFLAGS(%rsp) /* were interrupts off? */
705 jnc 1f 706 jnc 1f
7060: cmpl $0, PER_CPU_VAR(__preempt_count) 7070: cmpl $0, PER_CPU_VAR(__preempt_count)
707 jnz 1f 708 jnz 1f
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 261802b1cc50..9f695f517747 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -46,10 +46,8 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
46 46
47CPPFLAGS_vdso.lds += -P -C 47CPPFLAGS_vdso.lds += -P -C
48 48
49VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ 49VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
50 -Wl,--no-undefined \ 50 -z max-page-size=4096 -z common-page-size=4096
51 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
52 $(DISABLE_LTO)
53 51
54$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE 52$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
55 $(call if_changed,vdso) 53 $(call if_changed,vdso)
@@ -58,9 +56,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(src
58hostprogs-y += vdso2c 56hostprogs-y += vdso2c
59 57
60quiet_cmd_vdso2c = VDSO2C $@ 58quiet_cmd_vdso2c = VDSO2C $@
61define cmd_vdso2c 59 cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@
62 $(obj)/vdso2c $< $(<:%.dbg=%) $@
63endef
64 60
65$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE 61$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
66 $(call if_changed,vdso2c) 62 $(call if_changed,vdso2c)
@@ -95,10 +91,8 @@ CFLAGS_REMOVE_vvar.o = -pg
95# 91#
96 92
97CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) 93CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
98VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ 94VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \
99 -Wl,-soname=linux-vdso.so.1 \ 95 -z max-page-size=4096 -z common-page-size=4096
100 -Wl,-z,max-page-size=4096 \
101 -Wl,-z,common-page-size=4096
102 96
103# x32-rebranded versions 97# x32-rebranded versions
104vobjx32s-y := $(vobjs-y:.o=-x32.o) 98vobjx32s-y := $(vobjs-y:.o=-x32.o)
@@ -123,7 +117,7 @@ $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
123 $(call if_changed,vdso) 117 $(call if_changed,vdso)
124 118
125CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 119CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
126VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 120VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
127 121
128targets += vdso32/vdso32.lds 122targets += vdso32/vdso32.lds
129targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o 123targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
@@ -157,13 +151,13 @@ $(obj)/vdso32.so.dbg: FORCE \
157# The DSO images are built using a special linker script. 151# The DSO images are built using a special linker script.
158# 152#
159quiet_cmd_vdso = VDSO $@ 153quiet_cmd_vdso = VDSO $@
160 cmd_vdso = $(CC) -nostdlib -o $@ \ 154 cmd_vdso = $(LD) -nostdlib -o $@ \
161 $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ 155 $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
162 -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ 156 -T $(filter %.lds,$^) $(filter %.o,$^) && \
163 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' 157 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
164 158
165VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \ 159VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
166 $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) 160 $(call ld-option, --build-id) -Bsymbolic
167GCOV_PROFILE := n 161GCOV_PROFILE := n
168 162
169# 163#
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 402338365651..5b0f613428c2 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -31,6 +31,8 @@
31#include <asm/mshyperv.h> 31#include <asm/mshyperv.h>
32#include <asm/apic.h> 32#include <asm/apic.h>
33 33
34#include <asm/trace/hyperv.h>
35
34static struct apic orig_apic; 36static struct apic orig_apic;
35 37
36static u64 hv_apic_icr_read(void) 38static u64 hv_apic_icr_read(void)
@@ -99,6 +101,9 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
99 int nr_bank = 0; 101 int nr_bank = 0;
100 int ret = 1; 102 int ret = 1;
101 103
104 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
105 return false;
106
102 local_irq_save(flags); 107 local_irq_save(flags);
103 arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); 108 arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
104 109
@@ -130,10 +135,10 @@ ipi_mask_ex_done:
130static bool __send_ipi_mask(const struct cpumask *mask, int vector) 135static bool __send_ipi_mask(const struct cpumask *mask, int vector)
131{ 136{
132 int cur_cpu, vcpu; 137 int cur_cpu, vcpu;
133 struct ipi_arg_non_ex **arg; 138 struct ipi_arg_non_ex ipi_arg;
134 struct ipi_arg_non_ex *ipi_arg;
135 int ret = 1; 139 int ret = 1;
136 unsigned long flags; 140
141 trace_hyperv_send_ipi_mask(mask, vector);
137 142
138 if (cpumask_empty(mask)) 143 if (cpumask_empty(mask))
139 return true; 144 return true;
@@ -144,40 +149,43 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
144 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) 149 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
145 return false; 150 return false;
146 151
147 if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) 152 /*
148 return __send_ipi_mask_ex(mask, vector); 153 * From the supplied CPU set we need to figure out if we can get away
149 154 * with cheaper HVCALL_SEND_IPI hypercall. This is possible when the
150 local_irq_save(flags); 155 * highest VP number in the set is < 64. As VP numbers are usually in
151 arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); 156 * ascending order and match Linux CPU ids, here is an optimization:
152 157 * we check the VP number for the highest bit in the supplied set first
153 ipi_arg = *arg; 158 * so we can quickly find out if using HVCALL_SEND_IPI_EX hypercall is
154 if (unlikely(!ipi_arg)) 159 * a must. We will also check all VP numbers when walking the supplied
155 goto ipi_mask_done; 160 * CPU set to remain correct in all cases.
156 161 */
157 ipi_arg->vector = vector; 162 if (hv_cpu_number_to_vp_number(cpumask_last(mask)) >= 64)
158 ipi_arg->reserved = 0; 163 goto do_ex_hypercall;
159 ipi_arg->cpu_mask = 0; 164
165 ipi_arg.vector = vector;
166 ipi_arg.cpu_mask = 0;
160 167
161 for_each_cpu(cur_cpu, mask) { 168 for_each_cpu(cur_cpu, mask) {
162 vcpu = hv_cpu_number_to_vp_number(cur_cpu); 169 vcpu = hv_cpu_number_to_vp_number(cur_cpu);
163 if (vcpu == VP_INVAL) 170 if (vcpu == VP_INVAL)
164 goto ipi_mask_done; 171 return false;
165 172
166 /* 173 /*
167 * This particular version of the IPI hypercall can 174 * This particular version of the IPI hypercall can
168 * only target upto 64 CPUs. 175 * only target upto 64 CPUs.
169 */ 176 */
170 if (vcpu >= 64) 177 if (vcpu >= 64)
171 goto ipi_mask_done; 178 goto do_ex_hypercall;
172 179
173 __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask); 180 __set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask);
174 } 181 }
175 182
176 ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL); 183 ret = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
177 184 ipi_arg.cpu_mask);
178ipi_mask_done:
179 local_irq_restore(flags);
180 return ((ret == 0) ? true : false); 185 return ((ret == 0) ? true : false);
186
187do_ex_hypercall:
188 return __send_ipi_mask_ex(mask, vector);
181} 189}
182 190
183static bool __send_ipi_one(int cpu, int vector) 191static bool __send_ipi_one(int cpu, int vector)
@@ -233,10 +241,7 @@ static void hv_send_ipi_self(int vector)
233void __init hv_apic_init(void) 241void __init hv_apic_init(void)
234{ 242{
235 if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { 243 if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) {
236 if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) 244 pr_info("Hyper-V: Using IPI hypercalls\n");
237 pr_info("Hyper-V: Using ext hypercalls for IPI\n");
238 else
239 pr_info("Hyper-V: Using IPI hypercalls\n");
240 /* 245 /*
241 * Set the IPI entry points. 246 * Set the IPI entry points.
242 */ 247 */
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index de27615c51ea..1147e1fed7ff 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -16,6 +16,8 @@
16/* Each gva in gva_list encodes up to 4096 pages to flush */ 16/* Each gva in gva_list encodes up to 4096 pages to flush */
17#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) 17#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
18 18
19static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
20 const struct flush_tlb_info *info);
19 21
20/* 22/*
21 * Fills in gva_list starting from offset. Returns the number of items added. 23 * Fills in gva_list starting from offset. Returns the number of items added.
@@ -93,10 +95,29 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
93 if (cpumask_equal(cpus, cpu_present_mask)) { 95 if (cpumask_equal(cpus, cpu_present_mask)) {
94 flush->flags |= HV_FLUSH_ALL_PROCESSORS; 96 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
95 } else { 97 } else {
98 /*
99 * From the supplied CPU set we need to figure out if we can get
100 * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
101 * hypercalls. This is possible when the highest VP number in
102 * the set is < 64. As VP numbers are usually in ascending order
103 * and match Linux CPU ids, here is an optimization: we check
104 * the VP number for the highest bit in the supplied set first
105 * so we can quickly find out if using *_EX hypercalls is a
106 * must. We will also check all VP numbers when walking the
107 * supplied CPU set to remain correct in all cases.
108 */
109 if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
110 goto do_ex_hypercall;
111
96 for_each_cpu(cpu, cpus) { 112 for_each_cpu(cpu, cpus) {
97 vcpu = hv_cpu_number_to_vp_number(cpu); 113 vcpu = hv_cpu_number_to_vp_number(cpu);
98 if (vcpu >= 64) 114 if (vcpu == VP_INVAL) {
115 local_irq_restore(flags);
99 goto do_native; 116 goto do_native;
117 }
118
119 if (vcpu >= 64)
120 goto do_ex_hypercall;
100 121
101 __set_bit(vcpu, (unsigned long *) 122 __set_bit(vcpu, (unsigned long *)
102 &flush->processor_mask); 123 &flush->processor_mask);
@@ -123,7 +144,12 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
123 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, 144 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
124 gva_n, 0, flush, NULL); 145 gva_n, 0, flush, NULL);
125 } 146 }
147 goto check_status;
148
149do_ex_hypercall:
150 status = hyperv_flush_tlb_others_ex(cpus, info);
126 151
152check_status:
127 local_irq_restore(flags); 153 local_irq_restore(flags);
128 154
129 if (!(status & HV_HYPERCALL_RESULT_MASK)) 155 if (!(status & HV_HYPERCALL_RESULT_MASK))
@@ -132,35 +158,22 @@ do_native:
132 native_flush_tlb_others(cpus, info); 158 native_flush_tlb_others(cpus, info);
133} 159}
134 160
135static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, 161static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
136 const struct flush_tlb_info *info) 162 const struct flush_tlb_info *info)
137{ 163{
138 int nr_bank = 0, max_gvas, gva_n; 164 int nr_bank = 0, max_gvas, gva_n;
139 struct hv_tlb_flush_ex **flush_pcpu; 165 struct hv_tlb_flush_ex **flush_pcpu;
140 struct hv_tlb_flush_ex *flush; 166 struct hv_tlb_flush_ex *flush;
141 u64 status = U64_MAX; 167 u64 status;
142 unsigned long flags;
143 168
144 trace_hyperv_mmu_flush_tlb_others(cpus, info); 169 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
145 170 return U64_MAX;
146 if (!hv_hypercall_pg)
147 goto do_native;
148
149 if (cpumask_empty(cpus))
150 return;
151
152 local_irq_save(flags);
153 171
154 flush_pcpu = (struct hv_tlb_flush_ex **) 172 flush_pcpu = (struct hv_tlb_flush_ex **)
155 this_cpu_ptr(hyperv_pcpu_input_arg); 173 this_cpu_ptr(hyperv_pcpu_input_arg);
156 174
157 flush = *flush_pcpu; 175 flush = *flush_pcpu;
158 176
159 if (unlikely(!flush)) {
160 local_irq_restore(flags);
161 goto do_native;
162 }
163
164 if (info->mm) { 177 if (info->mm) {
165 /* 178 /*
166 * AddressSpace argument must match the CR3 with PCID bits 179 * AddressSpace argument must match the CR3 with PCID bits
@@ -176,15 +189,10 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
176 189
177 flush->hv_vp_set.valid_bank_mask = 0; 190 flush->hv_vp_set.valid_bank_mask = 0;
178 191
179 if (!cpumask_equal(cpus, cpu_present_mask)) { 192 flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
180 flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; 193 nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
181 nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); 194 if (nr_bank < 0)
182 } 195 return U64_MAX;
183
184 if (!nr_bank) {
185 flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
186 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
187 }
188 196
189 /* 197 /*
190 * We can flush not more than max_gvas with one hypercall. Flush the 198 * We can flush not more than max_gvas with one hypercall. Flush the
@@ -213,12 +221,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
213 gva_n, nr_bank, flush, NULL); 221 gva_n, nr_bank, flush, NULL);
214 } 222 }
215 223
216 local_irq_restore(flags); 224 return status;
217
218 if (!(status & HV_HYPERCALL_RESULT_MASK))
219 return;
220do_native:
221 native_flush_tlb_others(cpus, info);
222} 225}
223 226
224void hyperv_setup_mmu_ops(void) 227void hyperv_setup_mmu_ops(void)
@@ -226,11 +229,6 @@ void hyperv_setup_mmu_ops(void)
226 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) 229 if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
227 return; 230 return;
228 231
229 if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) { 232 pr_info("Using hypercall for remote TLB flush\n");
230 pr_info("Using hypercall for remote TLB flush\n"); 233 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
231 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
232 } else {
233 pr_info("Using ext hypercall for remote TLB flush\n");
234 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
235 }
236} 234}
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 0db6bec95489..b143717b92b3 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,6 +80,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
80 * true if the result is zero, or false for all 80 * true if the result is zero, or false for all
81 * other cases. 81 * other cases.
82 */ 82 */
83#define arch_atomic_sub_and_test arch_atomic_sub_and_test
83static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) 84static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
84{ 85{
85 GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); 86 GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
@@ -91,6 +92,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
91 * 92 *
92 * Atomically increments @v by 1. 93 * Atomically increments @v by 1.
93 */ 94 */
95#define arch_atomic_inc arch_atomic_inc
94static __always_inline void arch_atomic_inc(atomic_t *v) 96static __always_inline void arch_atomic_inc(atomic_t *v)
95{ 97{
96 asm volatile(LOCK_PREFIX "incl %0" 98 asm volatile(LOCK_PREFIX "incl %0"
@@ -103,6 +105,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
103 * 105 *
104 * Atomically decrements @v by 1. 106 * Atomically decrements @v by 1.
105 */ 107 */
108#define arch_atomic_dec arch_atomic_dec
106static __always_inline void arch_atomic_dec(atomic_t *v) 109static __always_inline void arch_atomic_dec(atomic_t *v)
107{ 110{
108 asm volatile(LOCK_PREFIX "decl %0" 111 asm volatile(LOCK_PREFIX "decl %0"
@@ -117,6 +120,7 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
117 * returns true if the result is 0, or false for all other 120 * returns true if the result is 0, or false for all other
118 * cases. 121 * cases.
119 */ 122 */
123#define arch_atomic_dec_and_test arch_atomic_dec_and_test
120static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) 124static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
121{ 125{
122 GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); 126 GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
@@ -130,6 +134,7 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
130 * and returns true if the result is zero, or false for all 134 * and returns true if the result is zero, or false for all
131 * other cases. 135 * other cases.
132 */ 136 */
137#define arch_atomic_inc_and_test arch_atomic_inc_and_test
133static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) 138static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
134{ 139{
135 GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); 140 GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
@@ -144,6 +149,7 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
144 * if the result is negative, or false when 149 * if the result is negative, or false when
145 * result is greater than or equal to zero. 150 * result is greater than or equal to zero.
146 */ 151 */
152#define arch_atomic_add_negative arch_atomic_add_negative
147static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) 153static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
148{ 154{
149 GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); 155 GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
@@ -173,9 +179,6 @@ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
173 return arch_atomic_add_return(-i, v); 179 return arch_atomic_add_return(-i, v);
174} 180}
175 181
176#define arch_atomic_inc_return(v) (arch_atomic_add_return(1, v))
177#define arch_atomic_dec_return(v) (arch_atomic_sub_return(1, v))
178
179static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) 182static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
180{ 183{
181 return xadd(&v->counter, i); 184 return xadd(&v->counter, i);
@@ -199,7 +202,7 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n
199 202
200static inline int arch_atomic_xchg(atomic_t *v, int new) 203static inline int arch_atomic_xchg(atomic_t *v, int new)
201{ 204{
202 return xchg(&v->counter, new); 205 return arch_xchg(&v->counter, new);
203} 206}
204 207
205static inline void arch_atomic_and(int i, atomic_t *v) 208static inline void arch_atomic_and(int i, atomic_t *v)
@@ -253,27 +256,6 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
253 return val; 256 return val;
254} 257}
255 258
256/**
257 * __arch_atomic_add_unless - add unless the number is already a given value
258 * @v: pointer of type atomic_t
259 * @a: the amount to add to v...
260 * @u: ...unless v is equal to u.
261 *
262 * Atomically adds @a to @v, so long as @v was not already @u.
263 * Returns the old value of @v.
264 */
265static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u)
266{
267 int c = arch_atomic_read(v);
268
269 do {
270 if (unlikely(c == u))
271 break;
272 } while (!arch_atomic_try_cmpxchg(v, &c, c + a));
273
274 return c;
275}
276
277#ifdef CONFIG_X86_32 259#ifdef CONFIG_X86_32
278# include <asm/atomic64_32.h> 260# include <asm/atomic64_32.h>
279#else 261#else
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 92212bf0484f..ef959f02d070 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -158,6 +158,7 @@ static inline long long arch_atomic64_inc_return(atomic64_t *v)
158 "S" (v) : "memory", "ecx"); 158 "S" (v) : "memory", "ecx");
159 return a; 159 return a;
160} 160}
161#define arch_atomic64_inc_return arch_atomic64_inc_return
161 162
162static inline long long arch_atomic64_dec_return(atomic64_t *v) 163static inline long long arch_atomic64_dec_return(atomic64_t *v)
163{ 164{
@@ -166,6 +167,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
166 "S" (v) : "memory", "ecx"); 167 "S" (v) : "memory", "ecx");
167 return a; 168 return a;
168} 169}
170#define arch_atomic64_dec_return arch_atomic64_dec_return
169 171
170/** 172/**
171 * arch_atomic64_add - add integer to atomic64 variable 173 * arch_atomic64_add - add integer to atomic64 variable
@@ -198,25 +200,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
198} 200}
199 201
200/** 202/**
201 * arch_atomic64_sub_and_test - subtract value from variable and test result
202 * @i: integer value to subtract
203 * @v: pointer to type atomic64_t
204 *
205 * Atomically subtracts @i from @v and returns
206 * true if the result is zero, or false for all
207 * other cases.
208 */
209static inline int arch_atomic64_sub_and_test(long long i, atomic64_t *v)
210{
211 return arch_atomic64_sub_return(i, v) == 0;
212}
213
214/**
215 * arch_atomic64_inc - increment atomic64 variable 203 * arch_atomic64_inc - increment atomic64 variable
216 * @v: pointer to type atomic64_t 204 * @v: pointer to type atomic64_t
217 * 205 *
218 * Atomically increments @v by 1. 206 * Atomically increments @v by 1.
219 */ 207 */
208#define arch_atomic64_inc arch_atomic64_inc
220static inline void arch_atomic64_inc(atomic64_t *v) 209static inline void arch_atomic64_inc(atomic64_t *v)
221{ 210{
222 __alternative_atomic64(inc, inc_return, /* no output */, 211 __alternative_atomic64(inc, inc_return, /* no output */,
@@ -229,6 +218,7 @@ static inline void arch_atomic64_inc(atomic64_t *v)
229 * 218 *
230 * Atomically decrements @v by 1. 219 * Atomically decrements @v by 1.
231 */ 220 */
221#define arch_atomic64_dec arch_atomic64_dec
232static inline void arch_atomic64_dec(atomic64_t *v) 222static inline void arch_atomic64_dec(atomic64_t *v)
233{ 223{
234 __alternative_atomic64(dec, dec_return, /* no output */, 224 __alternative_atomic64(dec, dec_return, /* no output */,
@@ -236,46 +226,6 @@ static inline void arch_atomic64_dec(atomic64_t *v)
236} 226}
237 227
238/** 228/**
239 * arch_atomic64_dec_and_test - decrement and test
240 * @v: pointer to type atomic64_t
241 *
242 * Atomically decrements @v by 1 and
243 * returns true if the result is 0, or false for all other
244 * cases.
245 */
246static inline int arch_atomic64_dec_and_test(atomic64_t *v)
247{
248 return arch_atomic64_dec_return(v) == 0;
249}
250
251/**
252 * atomic64_inc_and_test - increment and test
253 * @v: pointer to type atomic64_t
254 *
255 * Atomically increments @v by 1
256 * and returns true if the result is zero, or false for all
257 * other cases.
258 */
259static inline int arch_atomic64_inc_and_test(atomic64_t *v)
260{
261 return arch_atomic64_inc_return(v) == 0;
262}
263
264/**
265 * arch_atomic64_add_negative - add and test if negative
266 * @i: integer value to add
267 * @v: pointer to type atomic64_t
268 *
269 * Atomically adds @i to @v and returns true
270 * if the result is negative, or false when
271 * result is greater than or equal to zero.
272 */
273static inline int arch_atomic64_add_negative(long long i, atomic64_t *v)
274{
275 return arch_atomic64_add_return(i, v) < 0;
276}
277
278/**
279 * arch_atomic64_add_unless - add unless the number is a given value 229 * arch_atomic64_add_unless - add unless the number is a given value
280 * @v: pointer of type atomic64_t 230 * @v: pointer of type atomic64_t
281 * @a: the amount to add to v... 231 * @a: the amount to add to v...
@@ -295,7 +245,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
295 return (int)a; 245 return (int)a;
296} 246}
297 247
298 248#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
299static inline int arch_atomic64_inc_not_zero(atomic64_t *v) 249static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
300{ 250{
301 int r; 251 int r;
@@ -304,6 +254,7 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
304 return r; 254 return r;
305} 255}
306 256
257#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
307static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) 258static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
308{ 259{
309 long long r; 260 long long r;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6106b59d3260..4343d9b4f30e 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,6 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
71 * true if the result is zero, or false for all 71 * true if the result is zero, or false for all
72 * other cases. 72 * other cases.
73 */ 73 */
74#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
74static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) 75static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
75{ 76{
76 GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); 77 GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
@@ -82,6 +83,7 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
82 * 83 *
83 * Atomically increments @v by 1. 84 * Atomically increments @v by 1.
84 */ 85 */
86#define arch_atomic64_inc arch_atomic64_inc
85static __always_inline void arch_atomic64_inc(atomic64_t *v) 87static __always_inline void arch_atomic64_inc(atomic64_t *v)
86{ 88{
87 asm volatile(LOCK_PREFIX "incq %0" 89 asm volatile(LOCK_PREFIX "incq %0"
@@ -95,6 +97,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
95 * 97 *
96 * Atomically decrements @v by 1. 98 * Atomically decrements @v by 1.
97 */ 99 */
100#define arch_atomic64_dec arch_atomic64_dec
98static __always_inline void arch_atomic64_dec(atomic64_t *v) 101static __always_inline void arch_atomic64_dec(atomic64_t *v)
99{ 102{
100 asm volatile(LOCK_PREFIX "decq %0" 103 asm volatile(LOCK_PREFIX "decq %0"
@@ -110,6 +113,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
110 * returns true if the result is 0, or false for all other 113 * returns true if the result is 0, or false for all other
111 * cases. 114 * cases.
112 */ 115 */
116#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
113static inline bool arch_atomic64_dec_and_test(atomic64_t *v) 117static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
114{ 118{
115 GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); 119 GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
@@ -123,6 +127,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
123 * and returns true if the result is zero, or false for all 127 * and returns true if the result is zero, or false for all
124 * other cases. 128 * other cases.
125 */ 129 */
130#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
126static inline bool arch_atomic64_inc_and_test(atomic64_t *v) 131static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
127{ 132{
128 GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); 133 GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
@@ -137,6 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
137 * if the result is negative, or false when 142 * if the result is negative, or false when
138 * result is greater than or equal to zero. 143 * result is greater than or equal to zero.
139 */ 144 */
145#define arch_atomic64_add_negative arch_atomic64_add_negative
140static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) 146static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
141{ 147{
142 GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); 148 GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
@@ -169,9 +175,6 @@ static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
169 return xadd(&v->counter, -i); 175 return xadd(&v->counter, -i);
170} 176}
171 177
172#define arch_atomic64_inc_return(v) (arch_atomic64_add_return(1, (v)))
173#define arch_atomic64_dec_return(v) (arch_atomic64_sub_return(1, (v)))
174
175static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new) 178static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
176{ 179{
177 return arch_cmpxchg(&v->counter, old, new); 180 return arch_cmpxchg(&v->counter, old, new);
@@ -185,46 +188,7 @@ static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, l
185 188
186static inline long arch_atomic64_xchg(atomic64_t *v, long new) 189static inline long arch_atomic64_xchg(atomic64_t *v, long new)
187{ 190{
188 return xchg(&v->counter, new); 191 return arch_xchg(&v->counter, new);
189}
190
191/**
192 * arch_atomic64_add_unless - add unless the number is a given value
193 * @v: pointer of type atomic64_t
194 * @a: the amount to add to v...
195 * @u: ...unless v is equal to u.
196 *
197 * Atomically adds @a to @v, so long as it was not @u.
198 * Returns the old value of @v.
199 */
200static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u)
201{
202 s64 c = arch_atomic64_read(v);
203 do {
204 if (unlikely(c == u))
205 return false;
206 } while (!arch_atomic64_try_cmpxchg(v, &c, c + a));
207 return true;
208}
209
210#define arch_atomic64_inc_not_zero(v) arch_atomic64_add_unless((v), 1, 0)
211
212/*
213 * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
214 * @v: pointer of type atomic_t
215 *
216 * The function returns the old value of *v minus 1, even if
217 * the atomic variable, v, was not decremented.
218 */
219static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
220{
221 s64 dec, c = arch_atomic64_read(v);
222 do {
223 dec = c - 1;
224 if (unlikely(dec < 0))
225 break;
226 } while (!arch_atomic64_try_cmpxchg(v, &c, dec));
227 return dec;
228} 192}
229 193
230static inline void arch_atomic64_and(long i, atomic64_t *v) 194static inline void arch_atomic64_and(long i, atomic64_t *v)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index e3efd8a06066..a55d79b233d3 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -75,7 +75,7 @@ extern void __add_wrong_size(void)
75 * use "asm volatile" and "memory" clobbers to prevent gcc from moving 75 * use "asm volatile" and "memory" clobbers to prevent gcc from moving
76 * information around. 76 * information around.
77 */ 77 */
78#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") 78#define arch_xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
79 79
80/* 80/*
81 * Atomic compare and exchange. Compare OLD with MEM, if identical, 81 * Atomic compare and exchange. Compare OLD with MEM, if identical,
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index bfca3b346c74..072e5459fe2f 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -10,13 +10,13 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
10#define arch_cmpxchg64(ptr, o, n) \ 10#define arch_cmpxchg64(ptr, o, n) \
11({ \ 11({ \
12 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 12 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
13 cmpxchg((ptr), (o), (n)); \ 13 arch_cmpxchg((ptr), (o), (n)); \
14}) 14})
15 15
16#define arch_cmpxchg64_local(ptr, o, n) \ 16#define arch_cmpxchg64_local(ptr, o, n) \
17({ \ 17({ \
18 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 18 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
19 cmpxchg_local((ptr), (o), (n)); \ 19 arch_cmpxchg_local((ptr), (o), (n)); \
20}) 20})
21 21
22#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) 22#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5701f5cecd31..b5c60faf8429 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -219,6 +219,7 @@
219#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ 219#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
220#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ 220#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
221#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ 221#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
222#define X86_FEATURE_IBRS_ENHANCED ( 7*32+29) /* Enhanced IBRS */
222 223
223/* Virtualization flags: Linux defined, word 8 */ 224/* Virtualization flags: Linux defined, word 8 */
224#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 225#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -229,7 +230,7 @@
229 230
230#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ 231#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
231#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ 232#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
232 233#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
233 234
234/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ 235/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
235#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ 236#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index cf090e584202..7ed08a7c3398 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -76,4 +76,17 @@
76#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ 76#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
77#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ 77#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
78 78
79/* Useful macros */
80#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \
81{ \
82 .vendor = X86_VENDOR_INTEL, \
83 .family = _family, \
84 .model = _model, \
85 .feature = X86_FEATURE_ANY, \
86 .driver_data = (kernel_ulong_t)&_driver_data \
87}
88
89#define INTEL_CPU_FAM6(_model, _driver_data) \
90 INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data)
91
79#endif /* _ASM_X86_INTEL_FAMILY_H */ 92#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index fe04491130ae..52f815a80539 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -80,35 +80,6 @@ enum intel_mid_cpu_type {
80 80
81extern enum intel_mid_cpu_type __intel_mid_cpu_chip; 81extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
82 82
83/**
84 * struct intel_mid_ops - Interface between intel-mid & sub archs
85 * @arch_setup: arch_setup function to re-initialize platform
86 * structures (x86_init, x86_platform_init)
87 *
88 * This structure can be extended if any new interface is required
89 * between intel-mid & its sub arch files.
90 */
91struct intel_mid_ops {
92 void (*arch_setup)(void);
93};
94
95/* Helper API's for INTEL_MID_OPS_INIT */
96#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \
97 [cpuid] = get_##cpuname##_ops
98
99/* Maximum number of CPU ops */
100#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *))
101
102/*
103 * For every new cpu addition, a weak get_<cpuname>_ops() function needs be
104 * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h.
105 */
106#define INTEL_MID_OPS_INIT { \
107 DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \
108 DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \
109 DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \
110};
111
112#ifdef CONFIG_X86_INTEL_MID 83#ifdef CONFIG_X86_INTEL_MID
113 84
114static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) 85static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
@@ -136,20 +107,6 @@ enum intel_mid_timer_options {
136 107
137extern enum intel_mid_timer_options intel_mid_timer_options; 108extern enum intel_mid_timer_options intel_mid_timer_options;
138 109
139/*
140 * Penwell uses spread spectrum clock, so the freq number is not exactly
141 * the same as reported by MSR based on SDM.
142 */
143#define FSB_FREQ_83SKU 83200
144#define FSB_FREQ_100SKU 99840
145#define FSB_FREQ_133SKU 133000
146
147#define FSB_FREQ_167SKU 167000
148#define FSB_FREQ_200SKU 200000
149#define FSB_FREQ_267SKU 267000
150#define FSB_FREQ_333SKU 333000
151#define FSB_FREQ_400SKU 400000
152
153/* Bus Select SoC Fuse value */ 110/* Bus Select SoC Fuse value */
154#define BSEL_SOC_FUSE_MASK 0x7 111#define BSEL_SOC_FUSE_MASK 0x7
155/* FSB 133MHz */ 112/* FSB 133MHz */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c4fc17220df9..c14f2a74b2be 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,6 +13,8 @@
13 * Interrupt control: 13 * Interrupt control:
14 */ 14 */
15 15
16/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */
17extern inline unsigned long native_save_fl(void);
16extern inline unsigned long native_save_fl(void) 18extern inline unsigned long native_save_fl(void)
17{ 19{
18 unsigned long flags; 20 unsigned long flags;
diff --git a/arch/x86/include/asm/kvm_guest.h b/arch/x86/include/asm/kvm_guest.h
deleted file mode 100644
index 46185263d9c2..000000000000
--- a/arch/x86/include/asm/kvm_guest.h
+++ /dev/null
@@ -1,7 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_X86_KVM_GUEST_H
3#define _ASM_X86_KVM_GUEST_H
4
5int kvm_setup_vsyscall_timeinfo(void);
6
7#endif /* _ASM_X86_KVM_GUEST_H */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 3aea2658323a..4c723632c036 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,7 +7,6 @@
7#include <uapi/asm/kvm_para.h> 7#include <uapi/asm/kvm_para.h>
8 8
9extern void kvmclock_init(void); 9extern void kvmclock_init(void);
10extern int kvm_register_clock(char *txt);
11 10
12#ifdef CONFIG_KVM_GUEST 11#ifdef CONFIG_KVM_GUEST
13bool kvm_check_and_clear_guest_paused(void); 12bool kvm_check_and_clear_guest_paused(void);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index bbc796eb0a3b..eeeb9289c764 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -71,12 +71,7 @@ struct ldt_struct {
71 71
72static inline void *ldt_slot_va(int slot) 72static inline void *ldt_slot_va(int slot)
73{ 73{
74#ifdef CONFIG_X86_64
75 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
76#else
77 BUG();
78 return (void *)fix_to_virt(FIX_HOLE);
79#endif
80} 75}
81 76
82/* 77/*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 5a7375ed5f7c..19886fef1dfc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -194,6 +194,40 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
194 return hv_status; 194 return hv_status;
195} 195}
196 196
197/* Fast hypercall with 16 bytes of input */
198static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
199{
200 u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
201
202#ifdef CONFIG_X86_64
203 {
204 __asm__ __volatile__("mov %4, %%r8\n"
205 CALL_NOSPEC
206 : "=a" (hv_status), ASM_CALL_CONSTRAINT,
207 "+c" (control), "+d" (input1)
208 : "r" (input2),
209 THUNK_TARGET(hv_hypercall_pg)
210 : "cc", "r8", "r9", "r10", "r11");
211 }
212#else
213 {
214 u32 input1_hi = upper_32_bits(input1);
215 u32 input1_lo = lower_32_bits(input1);
216 u32 input2_hi = upper_32_bits(input2);
217 u32 input2_lo = lower_32_bits(input2);
218
219 __asm__ __volatile__ (CALL_NOSPEC
220 : "=A"(hv_status),
221 "+c"(input1_lo), ASM_CALL_CONSTRAINT
222 : "A" (control), "b" (input1_hi),
223 "D"(input2_hi), "S"(input2_lo),
224 THUNK_TARGET(hv_hypercall_pg)
225 : "cc");
226 }
227#endif
228 return hv_status;
229}
230
197/* 231/*
198 * Rep hypercalls. Callers of this functions are supposed to ensure that 232 * Rep hypercalls. Callers of this functions are supposed to ensure that
199 * rep_count and varhead_size comply with Hyper-V hypercall definition. 233 * rep_count and varhead_size comply with Hyper-V hypercall definition.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f6f6c63da62f..fd2a8c1b88bc 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -214,7 +214,7 @@ enum spectre_v2_mitigation {
214 SPECTRE_V2_RETPOLINE_MINIMAL_AMD, 214 SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
215 SPECTRE_V2_RETPOLINE_GENERIC, 215 SPECTRE_V2_RETPOLINE_GENERIC,
216 SPECTRE_V2_RETPOLINE_AMD, 216 SPECTRE_V2_RETPOLINE_AMD,
217 SPECTRE_V2_IBRS, 217 SPECTRE_V2_IBRS_ENHANCED,
218}; 218};
219 219
220/* The Speculative Store Bypass disable variants */ 220/* The Speculative Store Bypass disable variants */
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 9c9dc579bd7d..46f516dd80ce 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -88,6 +88,7 @@ struct orc_entry {
88 unsigned sp_reg:4; 88 unsigned sp_reg:4;
89 unsigned bp_reg:4; 89 unsigned bp_reg:4;
90 unsigned type:2; 90 unsigned type:2;
91 unsigned end:1;
91} __packed; 92} __packed;
92 93
93/* 94/*
@@ -101,6 +102,7 @@ struct unwind_hint {
101 s16 sp_offset; 102 s16 sp_offset;
102 u8 sp_reg; 103 u8 sp_reg;
103 u8 type; 104 u8 type;
105 u8 end;
104}; 106};
105#endif /* __ASSEMBLY__ */ 107#endif /* __ASSEMBLY__ */
106 108
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a06b07399d17..e9202a0de8f0 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -450,9 +450,10 @@ do { \
450 bool __ret; \ 450 bool __ret; \
451 typeof(pcp1) __o1 = (o1), __n1 = (n1); \ 451 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
452 typeof(pcp2) __o2 = (o2), __n2 = (n2); \ 452 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
453 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ 453 asm volatile("cmpxchg8b "__percpu_arg(1) \
454 : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ 454 CC_SET(z) \
455 : "b" (__n1), "c" (__n2), "a" (__o1)); \ 455 : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
456 : "b" (__n1), "c" (__n2)); \
456 __ret; \ 457 __ret; \
457}) 458})
458 459
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 685ffe8a0eaf..c399ea5eea41 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -19,6 +19,9 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte)
19 19
20static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) 20static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
21{ 21{
22#ifdef CONFIG_PAGE_TABLE_ISOLATION
23 pmd.pud.p4d.pgd = pti_set_user_pgtbl(&pmdp->pud.p4d.pgd, pmd.pud.p4d.pgd);
24#endif
22 *pmdp = pmd; 25 *pmdp = pmd;
23} 26}
24 27
@@ -58,6 +61,9 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
58#ifdef CONFIG_SMP 61#ifdef CONFIG_SMP
59static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) 62static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
60{ 63{
64#ifdef CONFIG_PAGE_TABLE_ISOLATION
65 pti_set_user_pgtbl(&xp->pud.p4d.pgd, __pgd(0));
66#endif
61 return __pmd(xchg((pmdval_t *)xp, 0)); 67 return __pmd(xchg((pmdval_t *)xp, 0));
62} 68}
63#else 69#else
@@ -67,6 +73,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
67#ifdef CONFIG_SMP 73#ifdef CONFIG_SMP
68static inline pud_t native_pudp_get_and_clear(pud_t *xp) 74static inline pud_t native_pudp_get_and_clear(pud_t *xp)
69{ 75{
76#ifdef CONFIG_PAGE_TABLE_ISOLATION
77 pti_set_user_pgtbl(&xp->p4d.pgd, __pgd(0));
78#endif
70 return __pud(xchg((pudval_t *)xp, 0)); 79 return __pud(xchg((pudval_t *)xp, 0));
71} 80}
72#else 81#else
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index f982ef808e7e..6deb6cd236e3 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -35,4 +35,7 @@ typedef union {
35 35
36#define PTRS_PER_PTE 1024 36#define PTRS_PER_PTE 1024
37 37
38/* This covers all VMSPLIT_* and VMSPLIT_*_OPT variants */
39#define PGD_KERNEL_START (CONFIG_PAGE_OFFSET >> PGDIR_SHIFT)
40
38#endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */ 41#endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index f24df59c40b2..f2ca3139ca22 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -98,6 +98,9 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
98 98
99static inline void native_set_pud(pud_t *pudp, pud_t pud) 99static inline void native_set_pud(pud_t *pudp, pud_t pud)
100{ 100{
101#ifdef CONFIG_PAGE_TABLE_ISOLATION
102 pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
103#endif
101 set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); 104 set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
102} 105}
103 106
@@ -229,6 +232,10 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
229{ 232{
230 union split_pud res, *orig = (union split_pud *)pudp; 233 union split_pud res, *orig = (union split_pud *)pudp;
231 234
235#ifdef CONFIG_PAGE_TABLE_ISOLATION
236 pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0));
237#endif
238
232 /* xchg acts as a barrier before setting of the high bits */ 239 /* xchg acts as a barrier before setting of the high bits */
233 res.pud_low = xchg(&orig->pud_low, 0); 240 res.pud_low = xchg(&orig->pud_low, 0);
234 res.pud_high = orig->pud_high; 241 res.pud_high = orig->pud_high;
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 6a59a6d0cc50..858358a82b14 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -21,9 +21,10 @@ typedef union {
21#endif /* !__ASSEMBLY__ */ 21#endif /* !__ASSEMBLY__ */
22 22
23#ifdef CONFIG_PARAVIRT 23#ifdef CONFIG_PARAVIRT
24#define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd) 24#define SHARED_KERNEL_PMD ((!static_cpu_has(X86_FEATURE_PTI) && \
25 (pv_info.shared_kernel_pmd)))
25#else 26#else
26#define SHARED_KERNEL_PMD 1 27#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
27#endif 28#endif
28 29
29/* 30/*
@@ -45,5 +46,6 @@ typedef union {
45#define PTRS_PER_PTE 512 46#define PTRS_PER_PTE 512
46 47
47#define MAX_POSSIBLE_PHYSMEM_BITS 36 48#define MAX_POSSIBLE_PHYSMEM_BITS 36
49#define PGD_KERNEL_START (CONFIG_PAGE_OFFSET >> PGDIR_SHIFT)
48 50
49#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ 51#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5715647fc4fe..a1cb3339da8d 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -30,11 +30,14 @@ int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
30void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); 30void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
31void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); 31void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
32void ptdump_walk_pgd_level_checkwx(void); 32void ptdump_walk_pgd_level_checkwx(void);
33void ptdump_walk_user_pgd_level_checkwx(void);
33 34
34#ifdef CONFIG_DEBUG_WX 35#ifdef CONFIG_DEBUG_WX
35#define debug_checkwx() ptdump_walk_pgd_level_checkwx() 36#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
37#define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx()
36#else 38#else
37#define debug_checkwx() do { } while (0) 39#define debug_checkwx() do { } while (0)
40#define debug_checkwx_user() do { } while (0)
38#endif 41#endif
39 42
40/* 43/*
@@ -640,8 +643,31 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
640 643
641pmd_t *populate_extra_pmd(unsigned long vaddr); 644pmd_t *populate_extra_pmd(unsigned long vaddr);
642pte_t *populate_extra_pte(unsigned long vaddr); 645pte_t *populate_extra_pte(unsigned long vaddr);
646
647#ifdef CONFIG_PAGE_TABLE_ISOLATION
648pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd);
649
650/*
651 * Take a PGD location (pgdp) and a pgd value that needs to be set there.
652 * Populates the user and returns the resulting PGD that must be set in
653 * the kernel copy of the page tables.
654 */
655static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
656{
657 if (!static_cpu_has(X86_FEATURE_PTI))
658 return pgd;
659 return __pti_set_user_pgtbl(pgdp, pgd);
660}
661#else /* CONFIG_PAGE_TABLE_ISOLATION */
662static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
663{
664 return pgd;
665}
666#endif /* CONFIG_PAGE_TABLE_ISOLATION */
667
643#endif /* __ASSEMBLY__ */ 668#endif /* __ASSEMBLY__ */
644 669
670
645#ifdef CONFIG_X86_32 671#ifdef CONFIG_X86_32
646# include <asm/pgtable_32.h> 672# include <asm/pgtable_32.h>
647#else 673#else
@@ -1154,6 +1180,70 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
1154 } 1180 }
1155} 1181}
1156#endif 1182#endif
1183/*
1184 * Page table pages are page-aligned. The lower half of the top
1185 * level is used for userspace and the top half for the kernel.
1186 *
1187 * Returns true for parts of the PGD that map userspace and
1188 * false for the parts that map the kernel.
1189 */
1190static inline bool pgdp_maps_userspace(void *__ptr)
1191{
1192 unsigned long ptr = (unsigned long)__ptr;
1193
1194 return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
1195}
1196
1197static inline int pgd_large(pgd_t pgd) { return 0; }
1198
1199#ifdef CONFIG_PAGE_TABLE_ISOLATION
1200/*
1201 * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
1202 * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
1203 * the user one is in the last 4k. To switch between them, you
1204 * just need to flip the 12th bit in their addresses.
1205 */
1206#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
1207
1208/*
1209 * This generates better code than the inline assembly in
1210 * __set_bit().
1211 */
1212static inline void *ptr_set_bit(void *ptr, int bit)
1213{
1214 unsigned long __ptr = (unsigned long)ptr;
1215
1216 __ptr |= BIT(bit);
1217 return (void *)__ptr;
1218}
1219static inline void *ptr_clear_bit(void *ptr, int bit)
1220{
1221 unsigned long __ptr = (unsigned long)ptr;
1222
1223 __ptr &= ~BIT(bit);
1224 return (void *)__ptr;
1225}
1226
1227static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
1228{
1229 return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
1230}
1231
1232static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
1233{
1234 return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
1235}
1236
1237static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
1238{
1239 return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
1240}
1241
1242static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
1243{
1244 return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
1245}
1246#endif /* CONFIG_PAGE_TABLE_ISOLATION */
1157 1247
1158/* 1248/*
1159 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); 1249 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 88a056b01db4..b3ec519e3982 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -34,8 +34,6 @@ static inline void check_pgt_cache(void) { }
34void paging_init(void); 34void paging_init(void);
35void sync_initial_page_table(void); 35void sync_initial_page_table(void);
36 36
37static inline int pgd_large(pgd_t pgd) { return 0; }
38
39/* 37/*
40 * Define this if things work differently on an i386 and an i486: 38 * Define this if things work differently on an i386 and an i486:
41 * it will (on an i486) warn about kernel memory accesses that are 39 * it will (on an i486) warn about kernel memory accesses that are
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index d9a001a4a872..b0bc0fff5f1f 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -50,13 +50,18 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
50 ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ 50 ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \
51 & PMD_MASK) 51 & PMD_MASK)
52 52
53#define PKMAP_BASE \ 53#define LDT_BASE_ADDR \
54 ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) 54 ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
55 55
56#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
57
58#define PKMAP_BASE \
59 ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
60
56#ifdef CONFIG_HIGHMEM 61#ifdef CONFIG_HIGHMEM
57# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) 62# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
58#else 63#else
59# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE) 64# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
60#endif 65#endif
61 66
62#define MODULES_VADDR VMALLOC_START 67#define MODULES_VADDR VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 3c5385f9a88f..acb6970e7bcf 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -132,90 +132,6 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
132#endif 132#endif
133} 133}
134 134
135#ifdef CONFIG_PAGE_TABLE_ISOLATION
136/*
137 * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
138 * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
139 * the user one is in the last 4k. To switch between them, you
140 * just need to flip the 12th bit in their addresses.
141 */
142#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
143
144/*
145 * This generates better code than the inline assembly in
146 * __set_bit().
147 */
148static inline void *ptr_set_bit(void *ptr, int bit)
149{
150 unsigned long __ptr = (unsigned long)ptr;
151
152 __ptr |= BIT(bit);
153 return (void *)__ptr;
154}
155static inline void *ptr_clear_bit(void *ptr, int bit)
156{
157 unsigned long __ptr = (unsigned long)ptr;
158
159 __ptr &= ~BIT(bit);
160 return (void *)__ptr;
161}
162
163static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
164{
165 return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
166}
167
168static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
169{
170 return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
171}
172
173static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
174{
175 return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
176}
177
178static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
179{
180 return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
181}
182#endif /* CONFIG_PAGE_TABLE_ISOLATION */
183
184/*
185 * Page table pages are page-aligned. The lower half of the top
186 * level is used for userspace and the top half for the kernel.
187 *
188 * Returns true for parts of the PGD that map userspace and
189 * false for the parts that map the kernel.
190 */
191static inline bool pgdp_maps_userspace(void *__ptr)
192{
193 unsigned long ptr = (unsigned long)__ptr;
194
195 return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
196}
197
198#ifdef CONFIG_PAGE_TABLE_ISOLATION
199pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
200
201/*
202 * Take a PGD location (pgdp) and a pgd value that needs to be set there.
203 * Populates the user and returns the resulting PGD that must be set in
204 * the kernel copy of the page tables.
205 */
206static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
207{
208 if (!static_cpu_has(X86_FEATURE_PTI))
209 return pgd;
210 return __pti_set_user_pgd(pgdp, pgd);
211}
212#else
213static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
214{
215 return pgd;
216}
217#endif
218
219static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) 135static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
220{ 136{
221 pgd_t pgd; 137 pgd_t pgd;
@@ -226,7 +142,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
226 } 142 }
227 143
228 pgd = native_make_pgd(native_p4d_val(p4d)); 144 pgd = native_make_pgd(native_p4d_val(p4d));
229 pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd); 145 pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd);
230 *p4dp = native_make_p4d(native_pgd_val(pgd)); 146 *p4dp = native_make_p4d(native_pgd_val(pgd));
231} 147}
232 148
@@ -237,7 +153,7 @@ static inline void native_p4d_clear(p4d_t *p4d)
237 153
238static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) 154static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
239{ 155{
240 *pgdp = pti_set_user_pgd(pgdp, pgd); 156 *pgdp = pti_set_user_pgtbl(pgdp, pgd);
241} 157}
242 158
243static inline void native_pgd_clear(pgd_t *pgd) 159static inline void native_pgd_clear(pgd_t *pgd)
@@ -255,7 +171,6 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
255/* 171/*
256 * Level 4 access. 172 * Level 4 access.
257 */ 173 */
258static inline int pgd_large(pgd_t pgd) { return 0; }
259#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) 174#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
260 175
261/* PUD - Level3 access */ 176/* PUD - Level3 access */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 054765ab2da2..04edd2d58211 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -115,6 +115,7 @@ extern unsigned int ptrs_per_p4d;
115#define LDT_PGD_ENTRY_L5 -112UL 115#define LDT_PGD_ENTRY_L5 -112UL
116#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) 116#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
117#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) 117#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
118#define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE)
118 119
119#define __VMALLOC_BASE_L4 0xffffc90000000000UL 120#define __VMALLOC_BASE_L4 0xffffc90000000000UL
120#define __VMALLOC_BASE_L5 0xffa0000000000000UL 121#define __VMALLOC_BASE_L5 0xffa0000000000000UL
@@ -153,4 +154,6 @@ extern unsigned int ptrs_per_p4d;
153 154
154#define EARLY_DYNAMIC_PAGE_TABLES 64 155#define EARLY_DYNAMIC_PAGE_TABLES 64
155 156
157#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t))
158
156#endif /* _ASM_X86_PGTABLE_64_DEFS_H */ 159#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 99fff853c944..b64acb08a62b 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -50,6 +50,7 @@
50#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) 50#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
51#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) 51#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
52#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) 52#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
53#define _PAGE_SOFTW3 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW3)
53#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) 54#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
54#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) 55#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
55#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) 56#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
@@ -266,14 +267,37 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
266 267
267typedef struct { pgdval_t pgd; } pgd_t; 268typedef struct { pgdval_t pgd; } pgd_t;
268 269
270#ifdef CONFIG_X86_PAE
271
272/*
273 * PHYSICAL_PAGE_MASK might be non-constant when SME is compiled in, so we can't
274 * use it here.
275 */
276
277#define PGD_PAE_PAGE_MASK ((signed long)PAGE_MASK)
278#define PGD_PAE_PHYS_MASK (((1ULL << __PHYSICAL_MASK_SHIFT)-1) & PGD_PAE_PAGE_MASK)
279
280/*
281 * PAE allows Base Address, P, PWT, PCD and AVL bits to be set in PGD entries.
282 * All other bits are Reserved MBZ
283 */
284#define PGD_ALLOWED_BITS (PGD_PAE_PHYS_MASK | _PAGE_PRESENT | \
285 _PAGE_PWT | _PAGE_PCD | \
286 _PAGE_SOFTW1 | _PAGE_SOFTW2 | _PAGE_SOFTW3)
287
288#else
289/* No need to mask any bits for !PAE */
290#define PGD_ALLOWED_BITS (~0ULL)
291#endif
292
269static inline pgd_t native_make_pgd(pgdval_t val) 293static inline pgd_t native_make_pgd(pgdval_t val)
270{ 294{
271 return (pgd_t) { val }; 295 return (pgd_t) { val & PGD_ALLOWED_BITS };
272} 296}
273 297
274static inline pgdval_t native_pgd_val(pgd_t pgd) 298static inline pgdval_t native_pgd_val(pgd_t pgd)
275{ 299{
276 return pgd.pgd; 300 return pgd.pgd & PGD_ALLOWED_BITS;
277} 301}
278 302
279static inline pgdval_t pgd_flags(pgd_t pgd) 303static inline pgdval_t pgd_flags(pgd_t pgd)
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 625a52a5594f..02c2cbda4a74 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -39,10 +39,6 @@
39#define CR3_PCID_MASK 0xFFFull 39#define CR3_PCID_MASK 0xFFFull
40#define CR3_NOFLUSH BIT_ULL(63) 40#define CR3_NOFLUSH BIT_ULL(63)
41 41
42#ifdef CONFIG_PAGE_TABLE_ISOLATION
43# define X86_CR3_PTI_PCID_USER_BIT 11
44#endif
45
46#else 42#else
47/* 43/*
48 * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save 44 * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
@@ -53,4 +49,8 @@
53#define CR3_NOFLUSH 0 49#define CR3_NOFLUSH 0
54#endif 50#endif
55 51
52#ifdef CONFIG_PAGE_TABLE_ISOLATION
53# define X86_CR3_PTI_PCID_USER_BIT 11
54#endif
55
56#endif /* _ASM_X86_PROCESSOR_FLAGS_H */ 56#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cfd29ee8c3da..59663c08c949 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -966,6 +966,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
966 966
967extern unsigned long arch_align_stack(unsigned long sp); 967extern unsigned long arch_align_stack(unsigned long sp);
968extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 968extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
969extern void free_kernel_image_pages(void *begin, void *end);
969 970
970void default_idle(void); 971void default_idle(void);
971#ifdef CONFIG_XEN 972#ifdef CONFIG_XEN
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 38a17f1d5c9d..5df09a0b80b8 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -6,10 +6,9 @@
6#ifdef CONFIG_PAGE_TABLE_ISOLATION 6#ifdef CONFIG_PAGE_TABLE_ISOLATION
7extern void pti_init(void); 7extern void pti_init(void);
8extern void pti_check_boottime_disable(void); 8extern void pti_check_boottime_disable(void);
9extern void pti_clone_kernel_text(void); 9extern void pti_finalize(void);
10#else 10#else
11static inline void pti_check_boottime_disable(void) { } 11static inline void pti_check_boottime_disable(void) { }
12static inline void pti_clone_kernel_text(void) { }
13#endif 12#endif
14 13
15#endif /* __ASSEMBLY__ */ 14#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4cf11d88d3b3..19b90521954c 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -5,6 +5,7 @@
5 * PaX/grsecurity. 5 * PaX/grsecurity.
6 */ 6 */
7#include <linux/refcount.h> 7#include <linux/refcount.h>
8#include <asm/bug.h>
8 9
9/* 10/*
10 * This is the first portion of the refcount error handling, which lives in 11 * This is the first portion of the refcount error handling, which lives in
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 5c019d23d06b..4a911a382ade 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -7,6 +7,7 @@
7 7
8extern char __brk_base[], __brk_limit[]; 8extern char __brk_base[], __brk_limit[];
9extern struct exception_table_entry __stop___ex_table[]; 9extern struct exception_table_entry __stop___ex_table[];
10extern char __end_rodata_aligned[];
10 11
11#if defined(CONFIG_X86_64) 12#if defined(CONFIG_X86_64)
12extern char __end_rodata_hpage_align[]; 13extern char __end_rodata_hpage_align[];
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index bd090367236c..34cffcef7375 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -46,6 +46,7 @@ int set_memory_np(unsigned long addr, int numpages);
46int set_memory_4k(unsigned long addr, int numpages); 46int set_memory_4k(unsigned long addr, int numpages);
47int set_memory_encrypted(unsigned long addr, int numpages); 47int set_memory_encrypted(unsigned long addr, int numpages);
48int set_memory_decrypted(unsigned long addr, int numpages); 48int set_memory_decrypted(unsigned long addr, int numpages);
49int set_memory_np_noalias(unsigned long addr, int numpages);
49 50
50int set_memory_array_uc(unsigned long *addr, int addrinarray); 51int set_memory_array_uc(unsigned long *addr, int addrinarray);
51int set_memory_array_wc(unsigned long *addr, int addrinarray); 52int set_memory_array_wc(unsigned long *addr, int addrinarray);
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index eb5f7999a893..36bd243843d6 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -87,15 +87,25 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
87#endif 87#endif
88 88
89/* This is used when switching tasks or entering/exiting vm86 mode. */ 89/* This is used when switching tasks or entering/exiting vm86 mode. */
90static inline void update_sp0(struct task_struct *task) 90static inline void update_task_stack(struct task_struct *task)
91{ 91{
92 /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */ 92 /* sp0 always points to the entry trampoline stack, which is constant: */
93#ifdef CONFIG_X86_32 93#ifdef CONFIG_X86_32
94 load_sp0(task->thread.sp0); 94 if (static_cpu_has(X86_FEATURE_XENPV))
95 load_sp0(task->thread.sp0);
96 else
97 this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
95#else 98#else
99 /*
100 * x86-64 updates x86_tss.sp1 via cpu_current_top_of_stack. That
101 * doesn't work on x86-32 because sp1 and
102 * cpu_current_top_of_stack have different values (because of
103 * the non-zero stack-padding on 32bit).
104 */
96 if (static_cpu_has(X86_FEATURE_XENPV)) 105 if (static_cpu_has(X86_FEATURE_XENPV))
97 load_sp0(task_top_of_stack(task)); 106 load_sp0(task_top_of_stack(task));
98#endif 107#endif
108
99} 109}
100 110
101#endif /* _ASM_X86_SWITCH_TO_H */ 111#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 2ecd34e2d46c..e85ff65c43c3 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -37,5 +37,6 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
37extern void *text_poke(void *addr, const void *opcode, size_t len); 37extern void *text_poke(void *addr, const void *opcode, size_t len);
38extern int poke_int3_handler(struct pt_regs *regs); 38extern int poke_int3_handler(struct pt_regs *regs);
39extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); 39extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
40extern int after_bootmem;
40 41
41#endif /* _ASM_X86_TEXT_PATCHING_H */ 42#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6690cd3fc8b1..511bf5fae8b8 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -148,22 +148,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
148#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) 148#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
149#endif 149#endif
150 150
151static inline bool tlb_defer_switch_to_init_mm(void)
152{
153 /*
154 * If we have PCID, then switching to init_mm is reasonably
155 * fast. If we don't have PCID, then switching to init_mm is
156 * quite slow, so we try to defer it in the hopes that we can
157 * avoid it entirely. The latter approach runs the risk of
158 * receiving otherwise unnecessary IPIs.
159 *
160 * This choice is just a heuristic. The tlb code can handle this
161 * function returning true or false regardless of whether we have
162 * PCID.
163 */
164 return !static_cpu_has(X86_FEATURE_PCID);
165}
166
167struct tlb_context { 151struct tlb_context {
168 u64 ctx_id; 152 u64 ctx_id;
169 u64 tlb_gen; 153 u64 tlb_gen;
@@ -554,4 +538,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
554 native_flush_tlb_others(mask, info) 538 native_flush_tlb_others(mask, info)
555#endif 539#endif
556 540
541extern void tlb_flush_remove_tables(struct mm_struct *mm);
542extern void tlb_flush_remove_tables_local(void *arg);
543
544#define HAVE_TLB_FLUSH_REMOVE_TABLES
545
557#endif /* _ASM_X86_TLBFLUSH_H */ 546#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index 4253bca99989..9c0d4b588e3f 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -28,6 +28,21 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
28 __entry->addr, __entry->end) 28 __entry->addr, __entry->end)
29 ); 29 );
30 30
31TRACE_EVENT(hyperv_send_ipi_mask,
32 TP_PROTO(const struct cpumask *cpus,
33 int vector),
34 TP_ARGS(cpus, vector),
35 TP_STRUCT__entry(
36 __field(unsigned int, ncpus)
37 __field(int, vector)
38 ),
39 TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
40 __entry->vector = vector;
41 ),
42 TP_printk("ncpus %d vector %x",
43 __entry->ncpus, __entry->vector)
44 );
45
31#endif /* CONFIG_HYPERV */ 46#endif /* CONFIG_HYPERV */
32 47
33#undef TRACE_INCLUDE_PATH 48#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 2701d221583a..eb5bbfeccb66 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -33,13 +33,13 @@ static inline cycles_t get_cycles(void)
33extern struct system_counterval_t convert_art_to_tsc(u64 art); 33extern struct system_counterval_t convert_art_to_tsc(u64 art);
34extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); 34extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
35 35
36extern void tsc_early_delay_calibrate(void); 36extern void tsc_early_init(void);
37extern void tsc_init(void); 37extern void tsc_init(void);
38extern void mark_tsc_unstable(char *reason); 38extern void mark_tsc_unstable(char *reason);
39extern int unsynchronized_tsc(void); 39extern int unsynchronized_tsc(void);
40extern int check_tsc_unstable(void); 40extern int check_tsc_unstable(void);
41extern void mark_tsc_async_resets(char *reason); 41extern void mark_tsc_async_resets(char *reason);
42extern unsigned long native_calibrate_cpu(void); 42extern unsigned long native_calibrate_cpu_early(void);
43extern unsigned long native_calibrate_tsc(void); 43extern unsigned long native_calibrate_tsc(void);
44extern unsigned long long native_sched_clock_from_tsc(u64 tsc); 44extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
45 45
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index bae46fc6b9de..0bcdb1279361 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -26,7 +26,7 @@
26 * the debuginfo as necessary. It will also warn if it sees any 26 * the debuginfo as necessary. It will also warn if it sees any
27 * inconsistencies. 27 * inconsistencies.
28 */ 28 */
29.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL 29.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0
30#ifdef CONFIG_STACK_VALIDATION 30#ifdef CONFIG_STACK_VALIDATION
31.Lunwind_hint_ip_\@: 31.Lunwind_hint_ip_\@:
32 .pushsection .discard.unwind_hints 32 .pushsection .discard.unwind_hints
@@ -35,12 +35,14 @@
35 .short \sp_offset 35 .short \sp_offset
36 .byte \sp_reg 36 .byte \sp_reg
37 .byte \type 37 .byte \type
38 .byte \end
39 .balign 4
38 .popsection 40 .popsection
39#endif 41#endif
40.endm 42.endm
41 43
42.macro UNWIND_HINT_EMPTY 44.macro UNWIND_HINT_EMPTY
43 UNWIND_HINT sp_reg=ORC_REG_UNDEFINED 45 UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1
44.endm 46.endm
45 47
46.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 48.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
@@ -86,19 +88,21 @@
86 88
87#else /* !__ASSEMBLY__ */ 89#else /* !__ASSEMBLY__ */
88 90
89#define UNWIND_HINT(sp_reg, sp_offset, type) \ 91#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
90 "987: \n\t" \ 92 "987: \n\t" \
91 ".pushsection .discard.unwind_hints\n\t" \ 93 ".pushsection .discard.unwind_hints\n\t" \
92 /* struct unwind_hint */ \ 94 /* struct unwind_hint */ \
93 ".long 987b - .\n\t" \ 95 ".long 987b - .\n\t" \
94 ".short " __stringify(sp_offset) "\n\t" \ 96 ".short " __stringify(sp_offset) "\n\t" \
95 ".byte " __stringify(sp_reg) "\n\t" \ 97 ".byte " __stringify(sp_reg) "\n\t" \
96 ".byte " __stringify(type) "\n\t" \ 98 ".byte " __stringify(type) "\n\t" \
99 ".byte " __stringify(end) "\n\t" \
100 ".balign 4 \n\t" \
97 ".popsection\n\t" 101 ".popsection\n\t"
98 102
99#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE) 103#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0)
100 104
101#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE) 105#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0)
102 106
103#endif /* __ASSEMBLY__ */ 107#endif /* __ASSEMBLY__ */
104 108
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a481763a3776..014f214da581 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -668,6 +668,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
668 local_irq_save(flags); 668 local_irq_save(flags);
669 memcpy(addr, opcode, len); 669 memcpy(addr, opcode, len);
670 local_irq_restore(flags); 670 local_irq_restore(flags);
671 sync_core();
671 /* Could also do a CLFLUSH here to speed up CPU recovery; but 672 /* Could also do a CLFLUSH here to speed up CPU recovery; but
672 that causes hangs on some VIA CPUs. */ 673 that causes hangs on some VIA CPUs. */
673 return addr; 674 return addr;
@@ -693,6 +694,12 @@ void *text_poke(void *addr, const void *opcode, size_t len)
693 struct page *pages[2]; 694 struct page *pages[2];
694 int i; 695 int i;
695 696
697 /*
698 * While boot memory allocator is runnig we cannot use struct
699 * pages as they are not yet initialized.
700 */
701 BUG_ON(!after_bootmem);
702
696 if (!core_kernel_text((unsigned long)addr)) { 703 if (!core_kernel_text((unsigned long)addr)) {
697 pages[0] = vmalloc_to_page(addr); 704 pages[0] = vmalloc_to_page(addr);
698 pages[1] = vmalloc_to_page(addr + PAGE_SIZE); 705 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index adbda5847b14..07fa222f0c52 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -940,7 +940,7 @@ static int __init calibrate_APIC_clock(void)
940 940
941 if (levt->features & CLOCK_EVT_FEAT_DUMMY) { 941 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
942 pr_warning("APIC timer disabled due to verification failure\n"); 942 pr_warning("APIC timer disabled due to verification failure\n");
943 return -1; 943 return -1;
944 } 944 }
945 945
946 return 0; 946 return 0;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 35aaee4fc028..0954315842c0 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -218,7 +218,8 @@ static int reserve_irq_vector(struct irq_data *irqd)
218 return 0; 218 return 0;
219} 219}
220 220
221static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) 221static int
222assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
222{ 223{
223 struct apic_chip_data *apicd = apic_chip_data(irqd); 224 struct apic_chip_data *apicd = apic_chip_data(irqd);
224 bool resvd = apicd->has_reserved; 225 bool resvd = apicd->has_reserved;
@@ -245,22 +246,12 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
245 return -EBUSY; 246 return -EBUSY;
246 247
247 vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); 248 vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
248 if (vector > 0)
249 apic_update_vector(irqd, vector, cpu);
250 trace_vector_alloc(irqd->irq, vector, resvd, vector); 249 trace_vector_alloc(irqd->irq, vector, resvd, vector);
251 return vector;
252}
253
254static int assign_vector_locked(struct irq_data *irqd,
255 const struct cpumask *dest)
256{
257 struct apic_chip_data *apicd = apic_chip_data(irqd);
258 int vector = allocate_vector(irqd, dest);
259
260 if (vector < 0) 250 if (vector < 0)
261 return vector; 251 return vector;
252 apic_update_vector(irqd, vector, cpu);
253 apic_update_irq_cfg(irqd, vector, cpu);
262 254
263 apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
264 return 0; 255 return 0;
265} 256}
266 257
@@ -433,7 +424,7 @@ static int activate_managed(struct irq_data *irqd)
433 pr_err("Managed startup irq %u, no vector available\n", 424 pr_err("Managed startup irq %u, no vector available\n",
434 irqd->irq); 425 irqd->irq);
435 } 426 }
436 return ret; 427 return ret;
437} 428}
438 429
439static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, 430static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d492752f79e1..391f358ebb4c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -394,10 +394,10 @@ extern int uv_hub_info_version(void)
394EXPORT_SYMBOL(uv_hub_info_version); 394EXPORT_SYMBOL(uv_hub_info_version);
395 395
396/* Default UV memory block size is 2GB */ 396/* Default UV memory block size is 2GB */
397static unsigned long mem_block_size = (2UL << 30); 397static unsigned long mem_block_size __initdata = (2UL << 30);
398 398
399/* Kernel parameter to specify UV mem block size */ 399/* Kernel parameter to specify UV mem block size */
400static int parse_mem_block_size(char *ptr) 400static int __init parse_mem_block_size(char *ptr)
401{ 401{
402 unsigned long size = memparse(ptr, NULL); 402 unsigned long size = memparse(ptr, NULL);
403 403
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index dcb008c320fe..01de31db300d 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -103,4 +103,9 @@ void common(void) {
103 OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); 103 OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
104 OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); 104 OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
105 DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); 105 DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
106 DEFINE(MASK_entry_stack, (~(sizeof(struct entry_stack) - 1)));
107
108 /* Offset for sp0 and sp1 into the tss_struct */
109 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
110 OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
106} 111}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index a4a3be399f4b..82826f2275cc 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -46,8 +46,14 @@ void foo(void)
46 OFFSET(saved_context_gdt_desc, saved_context, gdt_desc); 46 OFFSET(saved_context_gdt_desc, saved_context, gdt_desc);
47 BLANK(); 47 BLANK();
48 48
49 /* Offset from the sysenter stack to tss.sp0 */ 49 /*
50 DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - 50 * Offset from the entry stack to task stack stored in TSS. Kernel entry
51 * happens on the per-cpu entry-stack, and the asm code switches to the
52 * task-stack pointer stored in x86_tss.sp1, which is a copy of
53 * task->thread.sp0 where entry code can find it.
54 */
55 DEFINE(TSS_entry2task_stack,
56 offsetof(struct cpu_entry_area, tss.x86_tss.sp1) -
51 offsetofend(struct cpu_entry_area, entry_stack_page.stack)); 57 offsetofend(struct cpu_entry_area, entry_stack_page.stack));
52 58
53#ifdef CONFIG_STACKPROTECTOR 59#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index b2dcd161f514..3b9405e7ba2b 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -65,8 +65,6 @@ int main(void)
65#undef ENTRY 65#undef ENTRY
66 66
67 OFFSET(TSS_ist, tss_struct, x86_tss.ist); 67 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
68 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
69 OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
70 BLANK(); 68 BLANK();
71 69
72#ifdef CONFIG_STACKPROTECTOR 70#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 7a40196967cb..347137e80bf5 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -35,7 +35,9 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
35obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 35obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
36obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 36obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
37 37
38obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o intel_rdt_ctrlmondata.o 38obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o
39obj-$(CONFIG_INTEL_RDT) += intel_rdt_ctrlmondata.o intel_rdt_pseudo_lock.o
40CFLAGS_intel_rdt_pseudo_lock.o = -I$(src)
39 41
40obj-$(CONFIG_X86_MCE) += mcheck/ 42obj-$(CONFIG_X86_MCE) += mcheck/
41obj-$(CONFIG_MTRR) += mtrr/ 43obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 38915fbfae73..b732438c1a1e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -232,8 +232,6 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
232 } 232 }
233 } 233 }
234 234
235 set_cpu_cap(c, X86_FEATURE_K7);
236
237 /* calling is from identify_secondary_cpu() ? */ 235 /* calling is from identify_secondary_cpu() ? */
238 if (!c->cpu_index) 236 if (!c->cpu_index)
239 return; 237 return;
@@ -617,6 +615,14 @@ static void early_init_amd(struct cpuinfo_x86 *c)
617 615
618 early_init_amd_mc(c); 616 early_init_amd_mc(c);
619 617
618#ifdef CONFIG_X86_32
619 if (c->x86 == 6)
620 set_cpu_cap(c, X86_FEATURE_K7);
621#endif
622
623 if (c->x86 >= 0xf)
624 set_cpu_cap(c, X86_FEATURE_K8);
625
620 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); 626 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
621 627
622 /* 628 /*
@@ -863,9 +869,6 @@ static void init_amd(struct cpuinfo_x86 *c)
863 869
864 init_amd_cacheinfo(c); 870 init_amd_cacheinfo(c);
865 871
866 if (c->x86 >= 0xf)
867 set_cpu_cap(c, X86_FEATURE_K8);
868
869 if (cpu_has(c, X86_FEATURE_XMM2)) { 872 if (cpu_has(c, X86_FEATURE_XMM2)) {
870 unsigned long long val; 873 unsigned long long val;
871 int ret; 874 int ret;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5c0ea39311fe..405a9a61bb89 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -130,6 +130,7 @@ static const char *spectre_v2_strings[] = {
130 [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", 130 [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
131 [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", 131 [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
132 [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", 132 [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
133 [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
133}; 134};
134 135
135#undef pr_fmt 136#undef pr_fmt
@@ -313,23 +314,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
313 return cmd; 314 return cmd;
314} 315}
315 316
316/* Check for Skylake-like CPUs (for RSB handling) */
317static bool __init is_skylake_era(void)
318{
319 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
320 boot_cpu_data.x86 == 6) {
321 switch (boot_cpu_data.x86_model) {
322 case INTEL_FAM6_SKYLAKE_MOBILE:
323 case INTEL_FAM6_SKYLAKE_DESKTOP:
324 case INTEL_FAM6_SKYLAKE_X:
325 case INTEL_FAM6_KABYLAKE_MOBILE:
326 case INTEL_FAM6_KABYLAKE_DESKTOP:
327 return true;
328 }
329 }
330 return false;
331}
332
333static void __init spectre_v2_select_mitigation(void) 317static void __init spectre_v2_select_mitigation(void)
334{ 318{
335 enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); 319 enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -349,6 +333,13 @@ static void __init spectre_v2_select_mitigation(void)
349 333
350 case SPECTRE_V2_CMD_FORCE: 334 case SPECTRE_V2_CMD_FORCE:
351 case SPECTRE_V2_CMD_AUTO: 335 case SPECTRE_V2_CMD_AUTO:
336 if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
337 mode = SPECTRE_V2_IBRS_ENHANCED;
338 /* Force it so VMEXIT will restore correctly */
339 x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
340 wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
341 goto specv2_set_mode;
342 }
352 if (IS_ENABLED(CONFIG_RETPOLINE)) 343 if (IS_ENABLED(CONFIG_RETPOLINE))
353 goto retpoline_auto; 344 goto retpoline_auto;
354 break; 345 break;
@@ -386,26 +377,20 @@ retpoline_auto:
386 setup_force_cpu_cap(X86_FEATURE_RETPOLINE); 377 setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
387 } 378 }
388 379
380specv2_set_mode:
389 spectre_v2_enabled = mode; 381 spectre_v2_enabled = mode;
390 pr_info("%s\n", spectre_v2_strings[mode]); 382 pr_info("%s\n", spectre_v2_strings[mode]);
391 383
392 /* 384 /*
393 * If neither SMEP nor PTI are available, there is a risk of 385 * If spectre v2 protection has been enabled, unconditionally fill
394 * hitting userspace addresses in the RSB after a context switch 386 * RSB during a context switch; this protects against two independent
395 * from a shallow call stack to a deeper one. To prevent this fill 387 * issues:
396 * the entire RSB, even when using IBRS.
397 * 388 *
398 * Skylake era CPUs have a separate issue with *underflow* of the 389 * - RSB underflow (and switch to BTB) on Skylake+
399 * RSB, when they will predict 'ret' targets from the generic BTB. 390 * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
400 * The proper mitigation for this is IBRS. If IBRS is not supported
401 * or deactivated in favour of retpolines the RSB fill on context
402 * switch is required.
403 */ 391 */
404 if ((!boot_cpu_has(X86_FEATURE_PTI) && 392 setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
405 !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { 393 pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
406 setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
407 pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
408 }
409 394
410 /* Initialize Indirect Branch Prediction Barrier if supported */ 395 /* Initialize Indirect Branch Prediction Barrier if supported */
411 if (boot_cpu_has(X86_FEATURE_IBPB)) { 396 if (boot_cpu_has(X86_FEATURE_IBPB)) {
@@ -415,9 +400,16 @@ retpoline_auto:
415 400
416 /* 401 /*
417 * Retpoline means the kernel is safe because it has no indirect 402 * Retpoline means the kernel is safe because it has no indirect
418 * branches. But firmware isn't, so use IBRS to protect that. 403 * branches. Enhanced IBRS protects firmware too, so, enable restricted
404 * speculation around firmware calls only when Enhanced IBRS isn't
405 * supported.
406 *
407 * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
408 * the user might select retpoline on the kernel command line and if
409 * the CPU supports Enhanced IBRS, kernel might un-intentionally not
410 * enable IBRS around firmware calls.
419 */ 411 */
420 if (boot_cpu_has(X86_FEATURE_IBRS)) { 412 if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
421 setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); 413 setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
422 pr_info("Enabling Restricted Speculation for firmware calls\n"); 414 pr_info("Enabling Restricted Speculation for firmware calls\n");
423 } 415 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index eb4cb3efd20e..ba6b8bb1c036 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1005,6 +1005,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
1005 !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) 1005 !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
1006 setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); 1006 setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
1007 1007
1008 if (ia32_cap & ARCH_CAP_IBRS_ALL)
1009 setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
1010
1008 if (x86_match_cpu(cpu_no_meltdown)) 1011 if (x86_match_cpu(cpu_no_meltdown))
1009 return; 1012 return;
1010 1013
@@ -1016,6 +1019,24 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
1016} 1019}
1017 1020
1018/* 1021/*
1022 * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
1023 * unfortunately, that's not true in practice because of early VIA
1024 * chips and (more importantly) broken virtualizers that are not easy
1025 * to detect. In the latter case it doesn't even *fail* reliably, so
1026 * probing for it doesn't even work. Disable it completely on 32-bit
1027 * unless we can find a reliable way to detect all the broken cases.
1028 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
1029 */
1030static void detect_nopl(void)
1031{
1032#ifdef CONFIG_X86_32
1033 setup_clear_cpu_cap(X86_FEATURE_NOPL);
1034#else
1035 setup_force_cpu_cap(X86_FEATURE_NOPL);
1036#endif
1037}
1038
1039/*
1019 * Do minimum CPU detection early. 1040 * Do minimum CPU detection early.
1020 * Fields really needed: vendor, cpuid_level, family, model, mask, 1041 * Fields really needed: vendor, cpuid_level, family, model, mask,
1021 * cache alignment. 1042 * cache alignment.
@@ -1089,6 +1110,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
1089 */ 1110 */
1090 if (!pgtable_l5_enabled()) 1111 if (!pgtable_l5_enabled())
1091 setup_clear_cpu_cap(X86_FEATURE_LA57); 1112 setup_clear_cpu_cap(X86_FEATURE_LA57);
1113
1114 detect_nopl();
1092} 1115}
1093 1116
1094void __init early_cpu_init(void) 1117void __init early_cpu_init(void)
@@ -1124,24 +1147,6 @@ void __init early_cpu_init(void)
1124 early_identify_cpu(&boot_cpu_data); 1147 early_identify_cpu(&boot_cpu_data);
1125} 1148}
1126 1149
1127/*
1128 * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
1129 * unfortunately, that's not true in practice because of early VIA
1130 * chips and (more importantly) broken virtualizers that are not easy
1131 * to detect. In the latter case it doesn't even *fail* reliably, so
1132 * probing for it doesn't even work. Disable it completely on 32-bit
1133 * unless we can find a reliable way to detect all the broken cases.
1134 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
1135 */
1136static void detect_nopl(struct cpuinfo_x86 *c)
1137{
1138#ifdef CONFIG_X86_32
1139 clear_cpu_cap(c, X86_FEATURE_NOPL);
1140#else
1141 set_cpu_cap(c, X86_FEATURE_NOPL);
1142#endif
1143}
1144
1145static void detect_null_seg_behavior(struct cpuinfo_x86 *c) 1150static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
1146{ 1151{
1147#ifdef CONFIG_X86_64 1152#ifdef CONFIG_X86_64
@@ -1204,8 +1209,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
1204 1209
1205 get_model_name(c); /* Default name */ 1210 get_model_name(c); /* Default name */
1206 1211
1207 detect_nopl(c);
1208
1209 detect_null_seg_behavior(c); 1212 detect_null_seg_behavior(c);
1210 1213
1211 /* 1214 /*
@@ -1804,11 +1807,12 @@ void cpu_init(void)
1804 enter_lazy_tlb(&init_mm, curr); 1807 enter_lazy_tlb(&init_mm, curr);
1805 1808
1806 /* 1809 /*
1807 * Initialize the TSS. Don't bother initializing sp0, as the initial 1810 * Initialize the TSS. sp0 points to the entry trampoline stack
1808 * task never enters user mode. 1811 * regardless of what task is running.
1809 */ 1812 */
1810 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); 1813 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1811 load_TR_desc(); 1814 load_TR_desc();
1815 load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
1812 1816
1813 load_mm_ldt(&init_mm); 1817 load_mm_ldt(&init_mm);
1814 1818
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index eb75564f2d25..c050cd6066af 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -465,14 +465,17 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
465#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 465#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
466#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 466#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
467#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 467#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
468#define x86_VMX_FEATURE_EPT_CAP_AD 0x00200000
468 469
469 u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; 470 u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
471 u32 msr_vpid_cap, msr_ept_cap;
470 472
471 clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); 473 clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
472 clear_cpu_cap(c, X86_FEATURE_VNMI); 474 clear_cpu_cap(c, X86_FEATURE_VNMI);
473 clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); 475 clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
474 clear_cpu_cap(c, X86_FEATURE_EPT); 476 clear_cpu_cap(c, X86_FEATURE_EPT);
475 clear_cpu_cap(c, X86_FEATURE_VPID); 477 clear_cpu_cap(c, X86_FEATURE_VPID);
478 clear_cpu_cap(c, X86_FEATURE_EPT_AD);
476 479
477 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); 480 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
478 msr_ctl = vmx_msr_high | vmx_msr_low; 481 msr_ctl = vmx_msr_high | vmx_msr_low;
@@ -487,8 +490,13 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
487 if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && 490 if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
488 (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) 491 (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
489 set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); 492 set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
490 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) 493 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) {
491 set_cpu_cap(c, X86_FEATURE_EPT); 494 set_cpu_cap(c, X86_FEATURE_EPT);
495 rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
496 msr_ept_cap, msr_vpid_cap);
497 if (msr_ept_cap & x86_VMX_FEATURE_EPT_CAP_AD)
498 set_cpu_cap(c, X86_FEATURE_EPT_AD);
499 }
492 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) 500 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
493 set_cpu_cap(c, X86_FEATURE_VPID); 501 set_cpu_cap(c, X86_FEATURE_VPID);
494 } 502 }
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index ec4754f81cbd..abb71ac70443 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -859,6 +859,8 @@ static __init bool get_rdt_resources(void)
859 return (rdt_mon_capable || rdt_alloc_capable); 859 return (rdt_mon_capable || rdt_alloc_capable);
860} 860}
861 861
862static enum cpuhp_state rdt_online;
863
862static int __init intel_rdt_late_init(void) 864static int __init intel_rdt_late_init(void)
863{ 865{
864 struct rdt_resource *r; 866 struct rdt_resource *r;
@@ -880,6 +882,7 @@ static int __init intel_rdt_late_init(void)
880 cpuhp_remove_state(state); 882 cpuhp_remove_state(state);
881 return ret; 883 return ret;
882 } 884 }
885 rdt_online = state;
883 886
884 for_each_alloc_capable_rdt_resource(r) 887 for_each_alloc_capable_rdt_resource(r)
885 pr_info("Intel RDT %s allocation detected\n", r->name); 888 pr_info("Intel RDT %s allocation detected\n", r->name);
@@ -891,3 +894,11 @@ static int __init intel_rdt_late_init(void)
891} 894}
892 895
893late_initcall(intel_rdt_late_init); 896late_initcall(intel_rdt_late_init);
897
898static void __exit intel_rdt_exit(void)
899{
900 cpuhp_remove_state(rdt_online);
901 rdtgroup_exit();
902}
903
904__exitcall(intel_rdt_exit);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 39752825e376..4e588f36228f 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -81,6 +81,34 @@ enum rdt_group_type {
81}; 81};
82 82
83/** 83/**
84 * enum rdtgrp_mode - Mode of a RDT resource group
85 * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
86 * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
87 * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
88 * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
89 * allowed AND the allocations are Cache Pseudo-Locked
90 *
91 * The mode of a resource group enables control over the allowed overlap
92 * between allocations associated with different resource groups (classes
93 * of service). User is able to modify the mode of a resource group by
94 * writing to the "mode" resctrl file associated with the resource group.
95 *
96 * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
97 * writing the appropriate text to the "mode" file. A resource group enters
98 * "pseudo-locked" mode after the schemata is written while the resource
99 * group is in "pseudo-locksetup" mode.
100 */
101enum rdtgrp_mode {
102 RDT_MODE_SHAREABLE = 0,
103 RDT_MODE_EXCLUSIVE,
104 RDT_MODE_PSEUDO_LOCKSETUP,
105 RDT_MODE_PSEUDO_LOCKED,
106
107 /* Must be last */
108 RDT_NUM_MODES,
109};
110
111/**
84 * struct mongroup - store mon group's data in resctrl fs. 112 * struct mongroup - store mon group's data in resctrl fs.
85 * @mon_data_kn kernlfs node for the mon_data directory 113 * @mon_data_kn kernlfs node for the mon_data directory
86 * @parent: parent rdtgrp 114 * @parent: parent rdtgrp
@@ -95,6 +123,43 @@ struct mongroup {
95}; 123};
96 124
97/** 125/**
126 * struct pseudo_lock_region - pseudo-lock region information
127 * @r: RDT resource to which this pseudo-locked region
128 * belongs
129 * @d: RDT domain to which this pseudo-locked region
130 * belongs
131 * @cbm: bitmask of the pseudo-locked region
132 * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread
133 * completion
134 * @thread_done: variable used by waitqueue to test if pseudo-locking
135 * thread completed
136 * @cpu: core associated with the cache on which the setup code
137 * will be run
138 * @line_size: size of the cache lines
139 * @size: size of pseudo-locked region in bytes
140 * @kmem: the kernel memory associated with pseudo-locked region
141 * @minor: minor number of character device associated with this
142 * region
143 * @debugfs_dir: pointer to this region's directory in the debugfs
144 * filesystem
145 * @pm_reqs: Power management QoS requests related to this region
146 */
147struct pseudo_lock_region {
148 struct rdt_resource *r;
149 struct rdt_domain *d;
150 u32 cbm;
151 wait_queue_head_t lock_thread_wq;
152 int thread_done;
153 int cpu;
154 unsigned int line_size;
155 unsigned int size;
156 void *kmem;
157 unsigned int minor;
158 struct dentry *debugfs_dir;
159 struct list_head pm_reqs;
160};
161
162/**
98 * struct rdtgroup - store rdtgroup's data in resctrl file system. 163 * struct rdtgroup - store rdtgroup's data in resctrl file system.
99 * @kn: kernfs node 164 * @kn: kernfs node
100 * @rdtgroup_list: linked list for all rdtgroups 165 * @rdtgroup_list: linked list for all rdtgroups
@@ -106,16 +171,20 @@ struct mongroup {
106 * @type: indicates type of this rdtgroup - either 171 * @type: indicates type of this rdtgroup - either
107 * monitor only or ctrl_mon group 172 * monitor only or ctrl_mon group
108 * @mon: mongroup related data 173 * @mon: mongroup related data
174 * @mode: mode of resource group
175 * @plr: pseudo-locked region
109 */ 176 */
110struct rdtgroup { 177struct rdtgroup {
111 struct kernfs_node *kn; 178 struct kernfs_node *kn;
112 struct list_head rdtgroup_list; 179 struct list_head rdtgroup_list;
113 u32 closid; 180 u32 closid;
114 struct cpumask cpu_mask; 181 struct cpumask cpu_mask;
115 int flags; 182 int flags;
116 atomic_t waitcount; 183 atomic_t waitcount;
117 enum rdt_group_type type; 184 enum rdt_group_type type;
118 struct mongroup mon; 185 struct mongroup mon;
186 enum rdtgrp_mode mode;
187 struct pseudo_lock_region *plr;
119}; 188};
120 189
121/* rdtgroup.flags */ 190/* rdtgroup.flags */
@@ -148,6 +217,7 @@ extern struct list_head rdt_all_groups;
148extern int max_name_width, max_data_width; 217extern int max_name_width, max_data_width;
149 218
150int __init rdtgroup_init(void); 219int __init rdtgroup_init(void);
220void __exit rdtgroup_exit(void);
151 221
152/** 222/**
153 * struct rftype - describe each file in the resctrl file system 223 * struct rftype - describe each file in the resctrl file system
@@ -216,22 +286,24 @@ struct mbm_state {
216 * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps 286 * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps
217 * @new_ctrl: new ctrl value to be loaded 287 * @new_ctrl: new ctrl value to be loaded
218 * @have_new_ctrl: did user provide new_ctrl for this domain 288 * @have_new_ctrl: did user provide new_ctrl for this domain
289 * @plr: pseudo-locked region (if any) associated with domain
219 */ 290 */
220struct rdt_domain { 291struct rdt_domain {
221 struct list_head list; 292 struct list_head list;
222 int id; 293 int id;
223 struct cpumask cpu_mask; 294 struct cpumask cpu_mask;
224 unsigned long *rmid_busy_llc; 295 unsigned long *rmid_busy_llc;
225 struct mbm_state *mbm_total; 296 struct mbm_state *mbm_total;
226 struct mbm_state *mbm_local; 297 struct mbm_state *mbm_local;
227 struct delayed_work mbm_over; 298 struct delayed_work mbm_over;
228 struct delayed_work cqm_limbo; 299 struct delayed_work cqm_limbo;
229 int mbm_work_cpu; 300 int mbm_work_cpu;
230 int cqm_work_cpu; 301 int cqm_work_cpu;
231 u32 *ctrl_val; 302 u32 *ctrl_val;
232 u32 *mbps_val; 303 u32 *mbps_val;
233 u32 new_ctrl; 304 u32 new_ctrl;
234 bool have_new_ctrl; 305 bool have_new_ctrl;
306 struct pseudo_lock_region *plr;
235}; 307};
236 308
237/** 309/**
@@ -351,7 +423,7 @@ struct rdt_resource {
351 struct rdt_cache cache; 423 struct rdt_cache cache;
352 struct rdt_membw membw; 424 struct rdt_membw membw;
353 const char *format_str; 425 const char *format_str;
354 int (*parse_ctrlval) (char *buf, struct rdt_resource *r, 426 int (*parse_ctrlval) (void *data, struct rdt_resource *r,
355 struct rdt_domain *d); 427 struct rdt_domain *d);
356 struct list_head evt_list; 428 struct list_head evt_list;
357 int num_rmid; 429 int num_rmid;
@@ -359,8 +431,8 @@ struct rdt_resource {
359 unsigned long fflags; 431 unsigned long fflags;
360}; 432};
361 433
362int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d); 434int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d);
363int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d); 435int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d);
364 436
365extern struct mutex rdtgroup_mutex; 437extern struct mutex rdtgroup_mutex;
366 438
@@ -368,7 +440,7 @@ extern struct rdt_resource rdt_resources_all[];
368extern struct rdtgroup rdtgroup_default; 440extern struct rdtgroup rdtgroup_default;
369DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); 441DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
370 442
371int __init rdtgroup_init(void); 443extern struct dentry *debugfs_resctrl;
372 444
373enum { 445enum {
374 RDT_RESOURCE_L3, 446 RDT_RESOURCE_L3,
@@ -439,13 +511,32 @@ void rdt_last_cmd_printf(const char *fmt, ...);
439void rdt_ctrl_update(void *arg); 511void rdt_ctrl_update(void *arg);
440struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn); 512struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
441void rdtgroup_kn_unlock(struct kernfs_node *kn); 513void rdtgroup_kn_unlock(struct kernfs_node *kn);
514int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
515int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
516 umode_t mask);
442struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, 517struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
443 struct list_head **pos); 518 struct list_head **pos);
444ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, 519ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
445 char *buf, size_t nbytes, loff_t off); 520 char *buf, size_t nbytes, loff_t off);
446int rdtgroup_schemata_show(struct kernfs_open_file *of, 521int rdtgroup_schemata_show(struct kernfs_open_file *of,
447 struct seq_file *s, void *v); 522 struct seq_file *s, void *v);
523bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
524 u32 _cbm, int closid, bool exclusive);
525unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
526 u32 cbm);
527enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
528int rdtgroup_tasks_assigned(struct rdtgroup *r);
529int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
530int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
531bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
532bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
533int rdt_pseudo_lock_init(void);
534void rdt_pseudo_lock_release(void);
535int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
536void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
448struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); 537struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
538int update_domains(struct rdt_resource *r, int closid);
539void closid_free(int closid);
449int alloc_rmid(void); 540int alloc_rmid(void);
450void free_rmid(u32 rmid); 541void free_rmid(u32 rmid);
451int rdt_get_mon_l3_config(struct rdt_resource *r); 542int rdt_get_mon_l3_config(struct rdt_resource *r);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 116d57b248d3..af358ca05160 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -64,9 +64,10 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
64 return true; 64 return true;
65} 65}
66 66
67int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d) 67int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d)
68{ 68{
69 unsigned long data; 69 unsigned long data;
70 char *buf = _buf;
70 71
71 if (d->have_new_ctrl) { 72 if (d->have_new_ctrl) {
72 rdt_last_cmd_printf("duplicate domain %d\n", d->id); 73 rdt_last_cmd_printf("duplicate domain %d\n", d->id);
@@ -87,7 +88,7 @@ int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
87 * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.). 88 * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
88 * Additionally Haswell requires at least two bits set. 89 * Additionally Haswell requires at least two bits set.
89 */ 90 */
90static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r) 91static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
91{ 92{
92 unsigned long first_bit, zero_bit, val; 93 unsigned long first_bit, zero_bit, val;
93 unsigned int cbm_len = r->cache.cbm_len; 94 unsigned int cbm_len = r->cache.cbm_len;
@@ -122,22 +123,64 @@ static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
122 return true; 123 return true;
123} 124}
124 125
126struct rdt_cbm_parse_data {
127 struct rdtgroup *rdtgrp;
128 char *buf;
129};
130
125/* 131/*
126 * Read one cache bit mask (hex). Check that it is valid for the current 132 * Read one cache bit mask (hex). Check that it is valid for the current
127 * resource type. 133 * resource type.
128 */ 134 */
129int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d) 135int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
130{ 136{
131 unsigned long data; 137 struct rdt_cbm_parse_data *data = _data;
138 struct rdtgroup *rdtgrp = data->rdtgrp;
139 u32 cbm_val;
132 140
133 if (d->have_new_ctrl) { 141 if (d->have_new_ctrl) {
134 rdt_last_cmd_printf("duplicate domain %d\n", d->id); 142 rdt_last_cmd_printf("duplicate domain %d\n", d->id);
135 return -EINVAL; 143 return -EINVAL;
136 } 144 }
137 145
138 if(!cbm_validate(buf, &data, r)) 146 /*
147 * Cannot set up more than one pseudo-locked region in a cache
148 * hierarchy.
149 */
150 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
151 rdtgroup_pseudo_locked_in_hierarchy(d)) {
152 rdt_last_cmd_printf("pseudo-locked region in hierarchy\n");
139 return -EINVAL; 153 return -EINVAL;
140 d->new_ctrl = data; 154 }
155
156 if (!cbm_validate(data->buf, &cbm_val, r))
157 return -EINVAL;
158
159 if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
160 rdtgrp->mode == RDT_MODE_SHAREABLE) &&
161 rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
162 rdt_last_cmd_printf("CBM overlaps with pseudo-locked region\n");
163 return -EINVAL;
164 }
165
166 /*
167 * The CBM may not overlap with the CBM of another closid if
168 * either is exclusive.
169 */
170 if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) {
171 rdt_last_cmd_printf("overlaps with exclusive group\n");
172 return -EINVAL;
173 }
174
175 if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) {
176 if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
177 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
178 rdt_last_cmd_printf("overlaps with other group\n");
179 return -EINVAL;
180 }
181 }
182
183 d->new_ctrl = cbm_val;
141 d->have_new_ctrl = true; 184 d->have_new_ctrl = true;
142 185
143 return 0; 186 return 0;
@@ -149,8 +192,10 @@ int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
149 * separated by ";". The "id" is in decimal, and must match one of 192 * separated by ";". The "id" is in decimal, and must match one of
150 * the "id"s for this resource. 193 * the "id"s for this resource.
151 */ 194 */
152static int parse_line(char *line, struct rdt_resource *r) 195static int parse_line(char *line, struct rdt_resource *r,
196 struct rdtgroup *rdtgrp)
153{ 197{
198 struct rdt_cbm_parse_data data;
154 char *dom = NULL, *id; 199 char *dom = NULL, *id;
155 struct rdt_domain *d; 200 struct rdt_domain *d;
156 unsigned long dom_id; 201 unsigned long dom_id;
@@ -167,15 +212,32 @@ next:
167 dom = strim(dom); 212 dom = strim(dom);
168 list_for_each_entry(d, &r->domains, list) { 213 list_for_each_entry(d, &r->domains, list) {
169 if (d->id == dom_id) { 214 if (d->id == dom_id) {
170 if (r->parse_ctrlval(dom, r, d)) 215 data.buf = dom;
216 data.rdtgrp = rdtgrp;
217 if (r->parse_ctrlval(&data, r, d))
171 return -EINVAL; 218 return -EINVAL;
219 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
220 /*
221 * In pseudo-locking setup mode and just
222 * parsed a valid CBM that should be
223 * pseudo-locked. Only one locked region per
224 * resource group and domain so just do
225 * the required initialization for single
226 * region and return.
227 */
228 rdtgrp->plr->r = r;
229 rdtgrp->plr->d = d;
230 rdtgrp->plr->cbm = d->new_ctrl;
231 d->plr = rdtgrp->plr;
232 return 0;
233 }
172 goto next; 234 goto next;
173 } 235 }
174 } 236 }
175 return -EINVAL; 237 return -EINVAL;
176} 238}
177 239
178static int update_domains(struct rdt_resource *r, int closid) 240int update_domains(struct rdt_resource *r, int closid)
179{ 241{
180 struct msr_param msr_param; 242 struct msr_param msr_param;
181 cpumask_var_t cpu_mask; 243 cpumask_var_t cpu_mask;
@@ -220,13 +282,14 @@ done:
220 return 0; 282 return 0;
221} 283}
222 284
223static int rdtgroup_parse_resource(char *resname, char *tok, int closid) 285static int rdtgroup_parse_resource(char *resname, char *tok,
286 struct rdtgroup *rdtgrp)
224{ 287{
225 struct rdt_resource *r; 288 struct rdt_resource *r;
226 289
227 for_each_alloc_enabled_rdt_resource(r) { 290 for_each_alloc_enabled_rdt_resource(r) {
228 if (!strcmp(resname, r->name) && closid < r->num_closid) 291 if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid)
229 return parse_line(tok, r); 292 return parse_line(tok, r, rdtgrp);
230 } 293 }
231 rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname); 294 rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname);
232 return -EINVAL; 295 return -EINVAL;
@@ -239,7 +302,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
239 struct rdt_domain *dom; 302 struct rdt_domain *dom;
240 struct rdt_resource *r; 303 struct rdt_resource *r;
241 char *tok, *resname; 304 char *tok, *resname;
242 int closid, ret = 0; 305 int ret = 0;
243 306
244 /* Valid input requires a trailing newline */ 307 /* Valid input requires a trailing newline */
245 if (nbytes == 0 || buf[nbytes - 1] != '\n') 308 if (nbytes == 0 || buf[nbytes - 1] != '\n')
@@ -253,7 +316,15 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
253 } 316 }
254 rdt_last_cmd_clear(); 317 rdt_last_cmd_clear();
255 318
256 closid = rdtgrp->closid; 319 /*
320 * No changes to pseudo-locked region allowed. It has to be removed
321 * and re-created instead.
322 */
323 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
324 ret = -EINVAL;
325 rdt_last_cmd_puts("resource group is pseudo-locked\n");
326 goto out;
327 }
257 328
258 for_each_alloc_enabled_rdt_resource(r) { 329 for_each_alloc_enabled_rdt_resource(r) {
259 list_for_each_entry(dom, &r->domains, list) 330 list_for_each_entry(dom, &r->domains, list)
@@ -272,17 +343,27 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
272 ret = -EINVAL; 343 ret = -EINVAL;
273 goto out; 344 goto out;
274 } 345 }
275 ret = rdtgroup_parse_resource(resname, tok, closid); 346 ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
276 if (ret) 347 if (ret)
277 goto out; 348 goto out;
278 } 349 }
279 350
280 for_each_alloc_enabled_rdt_resource(r) { 351 for_each_alloc_enabled_rdt_resource(r) {
281 ret = update_domains(r, closid); 352 ret = update_domains(r, rdtgrp->closid);
282 if (ret) 353 if (ret)
283 goto out; 354 goto out;
284 } 355 }
285 356
357 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
358 /*
359 * If pseudo-locking fails we keep the resource group in
360 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
361 * active and updated for just the domain the pseudo-locked
362 * region was requested for.
363 */
364 ret = rdtgroup_pseudo_lock_create(rdtgrp);
365 }
366
286out: 367out:
287 rdtgroup_kn_unlock(of->kn); 368 rdtgroup_kn_unlock(of->kn);
288 return ret ?: nbytes; 369 return ret ?: nbytes;
@@ -318,10 +399,18 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
318 399
319 rdtgrp = rdtgroup_kn_lock_live(of->kn); 400 rdtgrp = rdtgroup_kn_lock_live(of->kn);
320 if (rdtgrp) { 401 if (rdtgrp) {
321 closid = rdtgrp->closid; 402 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
322 for_each_alloc_enabled_rdt_resource(r) { 403 for_each_alloc_enabled_rdt_resource(r)
323 if (closid < r->num_closid) 404 seq_printf(s, "%s:uninitialized\n", r->name);
324 show_doms(s, r, closid); 405 } else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
406 seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->r->name,
407 rdtgrp->plr->d->id, rdtgrp->plr->cbm);
408 } else {
409 closid = rdtgrp->closid;
410 for_each_alloc_enabled_rdt_resource(r) {
411 if (closid < r->num_closid)
412 show_doms(s, r, closid);
413 }
325 } 414 }
326 } else { 415 } else {
327 ret = -ENOENT; 416 ret = -ENOENT;
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
new file mode 100644
index 000000000000..40f3903ae5d9
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -0,0 +1,1522 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Resource Director Technology (RDT)
4 *
5 * Pseudo-locking support built on top of Cache Allocation Technology (CAT)
6 *
7 * Copyright (C) 2018 Intel Corporation
8 *
9 * Author: Reinette Chatre <reinette.chatre@intel.com>
10 */
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/cacheinfo.h>
15#include <linux/cpu.h>
16#include <linux/cpumask.h>
17#include <linux/debugfs.h>
18#include <linux/kthread.h>
19#include <linux/mman.h>
20#include <linux/pm_qos.h>
21#include <linux/slab.h>
22#include <linux/uaccess.h>
23
24#include <asm/cacheflush.h>
25#include <asm/intel-family.h>
26#include <asm/intel_rdt_sched.h>
27#include <asm/perf_event.h>
28
29#include "intel_rdt.h"
30
31#define CREATE_TRACE_POINTS
32#include "intel_rdt_pseudo_lock_event.h"
33
34/*
35 * MSR_MISC_FEATURE_CONTROL register enables the modification of hardware
36 * prefetcher state. Details about this register can be found in the MSR
37 * tables for specific platforms found in Intel's SDM.
38 */
39#define MSR_MISC_FEATURE_CONTROL 0x000001a4
40
41/*
42 * The bits needed to disable hardware prefetching varies based on the
43 * platform. During initialization we will discover which bits to use.
44 */
45static u64 prefetch_disable_bits;
46
47/*
48 * Major number assigned to and shared by all devices exposing
49 * pseudo-locked regions.
50 */
51static unsigned int pseudo_lock_major;
52static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
53static struct class *pseudo_lock_class;
54
55/**
56 * get_prefetch_disable_bits - prefetch disable bits of supported platforms
57 *
58 * Capture the list of platforms that have been validated to support
59 * pseudo-locking. This includes testing to ensure pseudo-locked regions
60 * with low cache miss rates can be created under variety of load conditions
61 * as well as that these pseudo-locked regions can maintain their low cache
62 * miss rates under variety of load conditions for significant lengths of time.
63 *
64 * After a platform has been validated to support pseudo-locking its
65 * hardware prefetch disable bits are included here as they are documented
66 * in the SDM.
67 *
68 * When adding a platform here also add support for its cache events to
69 * measure_cycles_perf_fn()
70 *
71 * Return:
72 * If platform is supported, the bits to disable hardware prefetchers, 0
73 * if platform is not supported.
74 */
75static u64 get_prefetch_disable_bits(void)
76{
77 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
78 boot_cpu_data.x86 != 6)
79 return 0;
80
81 switch (boot_cpu_data.x86_model) {
82 case INTEL_FAM6_BROADWELL_X:
83 /*
84 * SDM defines bits of MSR_MISC_FEATURE_CONTROL register
85 * as:
86 * 0 L2 Hardware Prefetcher Disable (R/W)
87 * 1 L2 Adjacent Cache Line Prefetcher Disable (R/W)
88 * 2 DCU Hardware Prefetcher Disable (R/W)
89 * 3 DCU IP Prefetcher Disable (R/W)
90 * 63:4 Reserved
91 */
92 return 0xF;
93 case INTEL_FAM6_ATOM_GOLDMONT:
94 case INTEL_FAM6_ATOM_GEMINI_LAKE:
95 /*
96 * SDM defines bits of MSR_MISC_FEATURE_CONTROL register
97 * as:
98 * 0 L2 Hardware Prefetcher Disable (R/W)
99 * 1 Reserved
100 * 2 DCU Hardware Prefetcher Disable (R/W)
101 * 63:3 Reserved
102 */
103 return 0x5;
104 }
105
106 return 0;
107}
108
109/*
110 * Helper to write 64bit value to MSR without tracing. Used when
111 * use of the cache should be restricted and use of registers used
112 * for local variables avoided.
113 */
114static inline void pseudo_wrmsrl_notrace(unsigned int msr, u64 val)
115{
116 __wrmsr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
117}
118
119/**
120 * pseudo_lock_minor_get - Obtain available minor number
121 * @minor: Pointer to where new minor number will be stored
122 *
123 * A bitmask is used to track available minor numbers. Here the next free
124 * minor number is marked as unavailable and returned.
125 *
126 * Return: 0 on success, <0 on failure.
127 */
128static int pseudo_lock_minor_get(unsigned int *minor)
129{
130 unsigned long first_bit;
131
132 first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
133
134 if (first_bit == MINORBITS)
135 return -ENOSPC;
136
137 __clear_bit(first_bit, &pseudo_lock_minor_avail);
138 *minor = first_bit;
139
140 return 0;
141}
142
143/**
144 * pseudo_lock_minor_release - Return minor number to available
145 * @minor: The minor number made available
146 */
147static void pseudo_lock_minor_release(unsigned int minor)
148{
149 __set_bit(minor, &pseudo_lock_minor_avail);
150}
151
152/**
153 * region_find_by_minor - Locate a pseudo-lock region by inode minor number
154 * @minor: The minor number of the device representing pseudo-locked region
155 *
156 * When the character device is accessed we need to determine which
157 * pseudo-locked region it belongs to. This is done by matching the minor
158 * number of the device to the pseudo-locked region it belongs.
159 *
160 * Minor numbers are assigned at the time a pseudo-locked region is associated
161 * with a cache instance.
162 *
163 * Return: On success return pointer to resource group owning the pseudo-locked
164 * region, NULL on failure.
165 */
166static struct rdtgroup *region_find_by_minor(unsigned int minor)
167{
168 struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
169
170 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
171 if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
172 rdtgrp_match = rdtgrp;
173 break;
174 }
175 }
176 return rdtgrp_match;
177}
178
179/**
180 * pseudo_lock_pm_req - A power management QoS request list entry
181 * @list: Entry within the @pm_reqs list for a pseudo-locked region
182 * @req: PM QoS request
183 */
184struct pseudo_lock_pm_req {
185 struct list_head list;
186 struct dev_pm_qos_request req;
187};
188
189static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
190{
191 struct pseudo_lock_pm_req *pm_req, *next;
192
193 list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
194 dev_pm_qos_remove_request(&pm_req->req);
195 list_del(&pm_req->list);
196 kfree(pm_req);
197 }
198}
199
200/**
201 * pseudo_lock_cstates_constrain - Restrict cores from entering C6
202 *
203 * To prevent the cache from being affected by power management entering
204 * C6 has to be avoided. This is accomplished by requesting a latency
205 * requirement lower than lowest C6 exit latency of all supported
206 * platforms as found in the cpuidle state tables in the intel_idle driver.
207 * At this time it is possible to do so with a single latency requirement
208 * for all supported platforms.
209 *
210 * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
211 * the ACPI latencies need to be considered while keeping in mind that C2
212 * may be set to map to deeper sleep states. In this case the latency
213 * requirement needs to prevent entering C2 also.
214 */
215static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
216{
217 struct pseudo_lock_pm_req *pm_req;
218 int cpu;
219 int ret;
220
221 for_each_cpu(cpu, &plr->d->cpu_mask) {
222 pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
223 if (!pm_req) {
224 rdt_last_cmd_puts("fail allocating mem for PM QoS\n");
225 ret = -ENOMEM;
226 goto out_err;
227 }
228 ret = dev_pm_qos_add_request(get_cpu_device(cpu),
229 &pm_req->req,
230 DEV_PM_QOS_RESUME_LATENCY,
231 30);
232 if (ret < 0) {
233 rdt_last_cmd_printf("fail to add latency req cpu%d\n",
234 cpu);
235 kfree(pm_req);
236 ret = -1;
237 goto out_err;
238 }
239 list_add(&pm_req->list, &plr->pm_reqs);
240 }
241
242 return 0;
243
244out_err:
245 pseudo_lock_cstates_relax(plr);
246 return ret;
247}
248
249/**
250 * pseudo_lock_region_clear - Reset pseudo-lock region data
251 * @plr: pseudo-lock region
252 *
253 * All content of the pseudo-locked region is reset - any memory allocated
254 * freed.
255 *
256 * Return: void
257 */
258static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
259{
260 plr->size = 0;
261 plr->line_size = 0;
262 kfree(plr->kmem);
263 plr->kmem = NULL;
264 plr->r = NULL;
265 if (plr->d)
266 plr->d->plr = NULL;
267 plr->d = NULL;
268 plr->cbm = 0;
269 plr->debugfs_dir = NULL;
270}
271
272/**
273 * pseudo_lock_region_init - Initialize pseudo-lock region information
274 * @plr: pseudo-lock region
275 *
276 * Called after user provided a schemata to be pseudo-locked. From the
277 * schemata the &struct pseudo_lock_region is on entry already initialized
278 * with the resource, domain, and capacity bitmask. Here the information
279 * required for pseudo-locking is deduced from this data and &struct
280 * pseudo_lock_region initialized further. This information includes:
281 * - size in bytes of the region to be pseudo-locked
282 * - cache line size to know the stride with which data needs to be accessed
283 * to be pseudo-locked
284 * - a cpu associated with the cache instance on which the pseudo-locking
285 * flow can be executed
286 *
287 * Return: 0 on success, <0 on failure. Descriptive error will be written
288 * to last_cmd_status buffer.
289 */
290static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
291{
292 struct cpu_cacheinfo *ci;
293 int ret;
294 int i;
295
296 /* Pick the first cpu we find that is associated with the cache. */
297 plr->cpu = cpumask_first(&plr->d->cpu_mask);
298
299 if (!cpu_online(plr->cpu)) {
300 rdt_last_cmd_printf("cpu %u associated with cache not online\n",
301 plr->cpu);
302 ret = -ENODEV;
303 goto out_region;
304 }
305
306 ci = get_cpu_cacheinfo(plr->cpu);
307
308 plr->size = rdtgroup_cbm_to_size(plr->r, plr->d, plr->cbm);
309
310 for (i = 0; i < ci->num_leaves; i++) {
311 if (ci->info_list[i].level == plr->r->cache_level) {
312 plr->line_size = ci->info_list[i].coherency_line_size;
313 return 0;
314 }
315 }
316
317 ret = -1;
318 rdt_last_cmd_puts("unable to determine cache line size\n");
319out_region:
320 pseudo_lock_region_clear(plr);
321 return ret;
322}
323
324/**
325 * pseudo_lock_init - Initialize a pseudo-lock region
326 * @rdtgrp: resource group to which new pseudo-locked region will belong
327 *
328 * A pseudo-locked region is associated with a resource group. When this
329 * association is created the pseudo-locked region is initialized. The
330 * details of the pseudo-locked region are not known at this time so only
331 * allocation is done and association established.
332 *
333 * Return: 0 on success, <0 on failure
334 */
335static int pseudo_lock_init(struct rdtgroup *rdtgrp)
336{
337 struct pseudo_lock_region *plr;
338
339 plr = kzalloc(sizeof(*plr), GFP_KERNEL);
340 if (!plr)
341 return -ENOMEM;
342
343 init_waitqueue_head(&plr->lock_thread_wq);
344 INIT_LIST_HEAD(&plr->pm_reqs);
345 rdtgrp->plr = plr;
346 return 0;
347}
348
349/**
350 * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
351 * @plr: pseudo-lock region
352 *
353 * Initialize the details required to set up the pseudo-locked region and
354 * allocate the contiguous memory that will be pseudo-locked to the cache.
355 *
356 * Return: 0 on success, <0 on failure. Descriptive error will be written
357 * to last_cmd_status buffer.
358 */
359static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
360{
361 int ret;
362
363 ret = pseudo_lock_region_init(plr);
364 if (ret < 0)
365 return ret;
366
367 /*
368 * We do not yet support contiguous regions larger than
369 * KMALLOC_MAX_SIZE.
370 */
371 if (plr->size > KMALLOC_MAX_SIZE) {
372 rdt_last_cmd_puts("requested region exceeds maximum size\n");
373 ret = -E2BIG;
374 goto out_region;
375 }
376
377 plr->kmem = kzalloc(plr->size, GFP_KERNEL);
378 if (!plr->kmem) {
379 rdt_last_cmd_puts("unable to allocate memory\n");
380 ret = -ENOMEM;
381 goto out_region;
382 }
383
384 ret = 0;
385 goto out;
386out_region:
387 pseudo_lock_region_clear(plr);
388out:
389 return ret;
390}
391
392/**
393 * pseudo_lock_free - Free a pseudo-locked region
394 * @rdtgrp: resource group to which pseudo-locked region belonged
395 *
396 * The pseudo-locked region's resources have already been released, or not
397 * yet created at this point. Now it can be freed and disassociated from the
398 * resource group.
399 *
400 * Return: void
401 */
402static void pseudo_lock_free(struct rdtgroup *rdtgrp)
403{
404 pseudo_lock_region_clear(rdtgrp->plr);
405 kfree(rdtgrp->plr);
406 rdtgrp->plr = NULL;
407}
408
409/**
410 * pseudo_lock_fn - Load kernel memory into cache
411 * @_rdtgrp: resource group to which pseudo-lock region belongs
412 *
413 * This is the core pseudo-locking flow.
414 *
415 * First we ensure that the kernel memory cannot be found in the cache.
416 * Then, while taking care that there will be as little interference as
417 * possible, the memory to be loaded is accessed while core is running
418 * with class of service set to the bitmask of the pseudo-locked region.
419 * After this is complete no future CAT allocations will be allowed to
420 * overlap with this bitmask.
421 *
422 * Local register variables are utilized to ensure that the memory region
423 * to be locked is the only memory access made during the critical locking
424 * loop.
425 *
426 * Return: 0. Waiter on waitqueue will be woken on completion.
427 */
428static int pseudo_lock_fn(void *_rdtgrp)
429{
430 struct rdtgroup *rdtgrp = _rdtgrp;
431 struct pseudo_lock_region *plr = rdtgrp->plr;
432 u32 rmid_p, closid_p;
433 unsigned long i;
434#ifdef CONFIG_KASAN
435 /*
436 * The registers used for local register variables are also used
437 * when KASAN is active. When KASAN is active we use a regular
438 * variable to ensure we always use a valid pointer, but the cost
439 * is that this variable will enter the cache through evicting the
440 * memory we are trying to lock into the cache. Thus expect lower
441 * pseudo-locking success rate when KASAN is active.
442 */
443 unsigned int line_size;
444 unsigned int size;
445 void *mem_r;
446#else
447 register unsigned int line_size asm("esi");
448 register unsigned int size asm("edi");
449#ifdef CONFIG_X86_64
450 register void *mem_r asm("rbx");
451#else
452 register void *mem_r asm("ebx");
453#endif /* CONFIG_X86_64 */
454#endif /* CONFIG_KASAN */
455
456 /*
457 * Make sure none of the allocated memory is cached. If it is we
458 * will get a cache hit in below loop from outside of pseudo-locked
459 * region.
460 * wbinvd (as opposed to clflush/clflushopt) is required to
461 * increase likelihood that allocated cache portion will be filled
462 * with associated memory.
463 */
464 native_wbinvd();
465
466 /*
467 * Always called with interrupts enabled. By disabling interrupts
468 * ensure that we will not be preempted during this critical section.
469 */
470 local_irq_disable();
471
472 /*
473 * Call wrmsr and rdmsr as directly as possible to avoid tracing
474 * clobbering local register variables or affecting cache accesses.
475 *
476 * Disable the hardware prefetcher so that when the end of the memory
477 * being pseudo-locked is reached the hardware will not read beyond
478 * the buffer and evict pseudo-locked memory read earlier from the
479 * cache.
480 */
481 __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
482 closid_p = this_cpu_read(pqr_state.cur_closid);
483 rmid_p = this_cpu_read(pqr_state.cur_rmid);
484 mem_r = plr->kmem;
485 size = plr->size;
486 line_size = plr->line_size;
487 /*
488 * Critical section begin: start by writing the closid associated
489 * with the capacity bitmask of the cache region being
490 * pseudo-locked followed by reading of kernel memory to load it
491 * into the cache.
492 */
493 __wrmsr(IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
494 /*
495 * Cache was flushed earlier. Now access kernel memory to read it
496 * into cache region associated with just activated plr->closid.
497 * Loop over data twice:
498 * - In first loop the cache region is shared with the page walker
499 * as it populates the paging structure caches (including TLB).
500 * - In the second loop the paging structure caches are used and
501 * cache region is populated with the memory being referenced.
502 */
503 for (i = 0; i < size; i += PAGE_SIZE) {
504 /*
505 * Add a barrier to prevent speculative execution of this
506 * loop reading beyond the end of the buffer.
507 */
508 rmb();
509 asm volatile("mov (%0,%1,1), %%eax\n\t"
510 :
511 : "r" (mem_r), "r" (i)
512 : "%eax", "memory");
513 }
514 for (i = 0; i < size; i += line_size) {
515 /*
516 * Add a barrier to prevent speculative execution of this
517 * loop reading beyond the end of the buffer.
518 */
519 rmb();
520 asm volatile("mov (%0,%1,1), %%eax\n\t"
521 :
522 : "r" (mem_r), "r" (i)
523 : "%eax", "memory");
524 }
525 /*
526 * Critical section end: restore closid with capacity bitmask that
527 * does not overlap with pseudo-locked region.
528 */
529 __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
530
531 /* Re-enable the hardware prefetcher(s) */
532 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
533 local_irq_enable();
534
535 plr->thread_done = 1;
536 wake_up_interruptible(&plr->lock_thread_wq);
537 return 0;
538}
539
540/**
541 * rdtgroup_monitor_in_progress - Test if monitoring in progress
542 * @r: resource group being queried
543 *
544 * Return: 1 if monitor groups have been created for this resource
545 * group, 0 otherwise.
546 */
547static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
548{
549 return !list_empty(&rdtgrp->mon.crdtgrp_list);
550}
551
552/**
553 * rdtgroup_locksetup_user_restrict - Restrict user access to group
554 * @rdtgrp: resource group needing access restricted
555 *
556 * A resource group used for cache pseudo-locking cannot have cpus or tasks
557 * assigned to it. This is communicated to the user by restricting access
558 * to all the files that can be used to make such changes.
559 *
560 * Permissions restored with rdtgroup_locksetup_user_restore()
561 *
562 * Return: 0 on success, <0 on failure. If a failure occurs during the
563 * restriction of access an attempt will be made to restore permissions but
564 * the state of the mode of these files will be uncertain when a failure
565 * occurs.
566 */
567static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
568{
569 int ret;
570
571 ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
572 if (ret)
573 return ret;
574
575 ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
576 if (ret)
577 goto err_tasks;
578
579 ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
580 if (ret)
581 goto err_cpus;
582
583 if (rdt_mon_capable) {
584 ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
585 if (ret)
586 goto err_cpus_list;
587 }
588
589 ret = 0;
590 goto out;
591
592err_cpus_list:
593 rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
594err_cpus:
595 rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
596err_tasks:
597 rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
598out:
599 return ret;
600}
601
602/**
603 * rdtgroup_locksetup_user_restore - Restore user access to group
604 * @rdtgrp: resource group needing access restored
605 *
606 * Restore all file access previously removed using
607 * rdtgroup_locksetup_user_restrict()
608 *
609 * Return: 0 on success, <0 on failure. If a failure occurs during the
610 * restoration of access an attempt will be made to restrict permissions
611 * again but the state of the mode of these files will be uncertain when
612 * a failure occurs.
613 */
614static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
615{
616 int ret;
617
618 ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
619 if (ret)
620 return ret;
621
622 ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
623 if (ret)
624 goto err_tasks;
625
626 ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
627 if (ret)
628 goto err_cpus;
629
630 if (rdt_mon_capable) {
631 ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
632 if (ret)
633 goto err_cpus_list;
634 }
635
636 ret = 0;
637 goto out;
638
639err_cpus_list:
640 rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
641err_cpus:
642 rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
643err_tasks:
644 rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
645out:
646 return ret;
647}
648
649/**
650 * rdtgroup_locksetup_enter - Resource group enters locksetup mode
651 * @rdtgrp: resource group requested to enter locksetup mode
652 *
653 * A resource group enters locksetup mode to reflect that it would be used
654 * to represent a pseudo-locked region and is in the process of being set
655 * up to do so. A resource group used for a pseudo-locked region would
656 * lose the closid associated with it so we cannot allow it to have any
657 * tasks or cpus assigned nor permit tasks or cpus to be assigned in the
658 * future. Monitoring of a pseudo-locked region is not allowed either.
659 *
660 * The above and more restrictions on a pseudo-locked region are checked
661 * for and enforced before the resource group enters the locksetup mode.
662 *
663 * Returns: 0 if the resource group successfully entered locksetup mode, <0
664 * on failure. On failure the last_cmd_status buffer is updated with text to
665 * communicate details of failure to the user.
666 */
667int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
668{
669 int ret;
670
671 /*
672 * The default resource group can neither be removed nor lose the
673 * default closid associated with it.
674 */
675 if (rdtgrp == &rdtgroup_default) {
676 rdt_last_cmd_puts("cannot pseudo-lock default group\n");
677 return -EINVAL;
678 }
679
680 /*
681 * Cache Pseudo-locking not supported when CDP is enabled.
682 *
683 * Some things to consider if you would like to enable this
684 * support (using L3 CDP as example):
685 * - When CDP is enabled two separate resources are exposed,
686 * L3DATA and L3CODE, but they are actually on the same cache.
687 * The implication for pseudo-locking is that if a
688 * pseudo-locked region is created on a domain of one
689 * resource (eg. L3CODE), then a pseudo-locked region cannot
690 * be created on that same domain of the other resource
691 * (eg. L3DATA). This is because the creation of a
692 * pseudo-locked region involves a call to wbinvd that will
693 * affect all cache allocations on particular domain.
694 * - Considering the previous, it may be possible to only
695 * expose one of the CDP resources to pseudo-locking and
696 * hide the other. For example, we could consider to only
697 * expose L3DATA and since the L3 cache is unified it is
698 * still possible to place instructions there are execute it.
699 * - If only one region is exposed to pseudo-locking we should
700 * still keep in mind that availability of a portion of cache
701 * for pseudo-locking should take into account both resources.
702 * Similarly, if a pseudo-locked region is created in one
703 * resource, the portion of cache used by it should be made
704 * unavailable to all future allocations from both resources.
705 */
706 if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled ||
707 rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) {
708 rdt_last_cmd_puts("CDP enabled\n");
709 return -EINVAL;
710 }
711
712 /*
713 * Not knowing the bits to disable prefetching implies that this
714 * platform does not support Cache Pseudo-Locking.
715 */
716 prefetch_disable_bits = get_prefetch_disable_bits();
717 if (prefetch_disable_bits == 0) {
718 rdt_last_cmd_puts("pseudo-locking not supported\n");
719 return -EINVAL;
720 }
721
722 if (rdtgroup_monitor_in_progress(rdtgrp)) {
723 rdt_last_cmd_puts("monitoring in progress\n");
724 return -EINVAL;
725 }
726
727 if (rdtgroup_tasks_assigned(rdtgrp)) {
728 rdt_last_cmd_puts("tasks assigned to resource group\n");
729 return -EINVAL;
730 }
731
732 if (!cpumask_empty(&rdtgrp->cpu_mask)) {
733 rdt_last_cmd_puts("CPUs assigned to resource group\n");
734 return -EINVAL;
735 }
736
737 if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
738 rdt_last_cmd_puts("unable to modify resctrl permissions\n");
739 return -EIO;
740 }
741
742 ret = pseudo_lock_init(rdtgrp);
743 if (ret) {
744 rdt_last_cmd_puts("unable to init pseudo-lock region\n");
745 goto out_release;
746 }
747
748 /*
749 * If this system is capable of monitoring a rmid would have been
750 * allocated when the control group was created. This is not needed
751 * anymore when this group would be used for pseudo-locking. This
752 * is safe to call on platforms not capable of monitoring.
753 */
754 free_rmid(rdtgrp->mon.rmid);
755
756 ret = 0;
757 goto out;
758
759out_release:
760 rdtgroup_locksetup_user_restore(rdtgrp);
761out:
762 return ret;
763}
764
765/**
766 * rdtgroup_locksetup_exit - resource group exist locksetup mode
767 * @rdtgrp: resource group
768 *
769 * When a resource group exits locksetup mode the earlier restrictions are
770 * lifted.
771 *
772 * Return: 0 on success, <0 on failure
773 */
774int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
775{
776 int ret;
777
778 if (rdt_mon_capable) {
779 ret = alloc_rmid();
780 if (ret < 0) {
781 rdt_last_cmd_puts("out of RMIDs\n");
782 return ret;
783 }
784 rdtgrp->mon.rmid = ret;
785 }
786
787 ret = rdtgroup_locksetup_user_restore(rdtgrp);
788 if (ret) {
789 free_rmid(rdtgrp->mon.rmid);
790 return ret;
791 }
792
793 pseudo_lock_free(rdtgrp);
794 return 0;
795}
796
797/**
798 * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
799 * @d: RDT domain
800 * @_cbm: CBM to test
801 *
802 * @d represents a cache instance and @_cbm a capacity bitmask that is
803 * considered for it. Determine if @_cbm overlaps with any existing
804 * pseudo-locked region on @d.
805 *
806 * Return: true if @_cbm overlaps with pseudo-locked region on @d, false
807 * otherwise.
808 */
809bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
810{
811 unsigned long *cbm = (unsigned long *)&_cbm;
812 unsigned long *cbm_b;
813 unsigned int cbm_len;
814
815 if (d->plr) {
816 cbm_len = d->plr->r->cache.cbm_len;
817 cbm_b = (unsigned long *)&d->plr->cbm;
818 if (bitmap_intersects(cbm, cbm_b, cbm_len))
819 return true;
820 }
821 return false;
822}
823
824/**
825 * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
826 * @d: RDT domain under test
827 *
828 * The setup of a pseudo-locked region affects all cache instances within
829 * the hierarchy of the region. It is thus essential to know if any
830 * pseudo-locked regions exist within a cache hierarchy to prevent any
831 * attempts to create new pseudo-locked regions in the same hierarchy.
832 *
833 * Return: true if a pseudo-locked region exists in the hierarchy of @d or
834 * if it is not possible to test due to memory allocation issue,
835 * false otherwise.
836 */
837bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
838{
839 cpumask_var_t cpu_with_psl;
840 struct rdt_resource *r;
841 struct rdt_domain *d_i;
842 bool ret = false;
843
844 if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
845 return true;
846
847 /*
848 * First determine which cpus have pseudo-locked regions
849 * associated with them.
850 */
851 for_each_alloc_enabled_rdt_resource(r) {
852 list_for_each_entry(d_i, &r->domains, list) {
853 if (d_i->plr)
854 cpumask_or(cpu_with_psl, cpu_with_psl,
855 &d_i->cpu_mask);
856 }
857 }
858
859 /*
860 * Next test if new pseudo-locked region would intersect with
861 * existing region.
862 */
863 if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
864 ret = true;
865
866 free_cpumask_var(cpu_with_psl);
867 return ret;
868}
869
870/**
871 * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory
872 * @_plr: pseudo-lock region to measure
873 *
874 * There is no deterministic way to test if a memory region is cached. One
875 * way is to measure how long it takes to read the memory, the speed of
876 * access is a good way to learn how close to the cpu the data was. Even
877 * more, if the prefetcher is disabled and the memory is read at a stride
878 * of half the cache line, then a cache miss will be easy to spot since the
879 * read of the first half would be significantly slower than the read of
880 * the second half.
881 *
882 * Return: 0. Waiter on waitqueue will be woken on completion.
883 */
884static int measure_cycles_lat_fn(void *_plr)
885{
886 struct pseudo_lock_region *plr = _plr;
887 unsigned long i;
888 u64 start, end;
889#ifdef CONFIG_KASAN
890 /*
891 * The registers used for local register variables are also used
892 * when KASAN is active. When KASAN is active we use a regular
893 * variable to ensure we always use a valid pointer to access memory.
894 * The cost is that accessing this pointer, which could be in
895 * cache, will be included in the measurement of memory read latency.
896 */
897 void *mem_r;
898#else
899#ifdef CONFIG_X86_64
900 register void *mem_r asm("rbx");
901#else
902 register void *mem_r asm("ebx");
903#endif /* CONFIG_X86_64 */
904#endif /* CONFIG_KASAN */
905
906 local_irq_disable();
907 /*
908 * The wrmsr call may be reordered with the assignment below it.
909 * Call wrmsr as directly as possible to avoid tracing clobbering
910 * local register variable used for memory pointer.
911 */
912 __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
913 mem_r = plr->kmem;
914 /*
915 * Dummy execute of the time measurement to load the needed
916 * instructions into the L1 instruction cache.
917 */
918 start = rdtsc_ordered();
919 for (i = 0; i < plr->size; i += 32) {
920 start = rdtsc_ordered();
921 asm volatile("mov (%0,%1,1), %%eax\n\t"
922 :
923 : "r" (mem_r), "r" (i)
924 : "%eax", "memory");
925 end = rdtsc_ordered();
926 trace_pseudo_lock_mem_latency((u32)(end - start));
927 }
928 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
929 local_irq_enable();
930 plr->thread_done = 1;
931 wake_up_interruptible(&plr->lock_thread_wq);
932 return 0;
933}
934
935static int measure_cycles_perf_fn(void *_plr)
936{
937 unsigned long long l3_hits = 0, l3_miss = 0;
938 u64 l3_hit_bits = 0, l3_miss_bits = 0;
939 struct pseudo_lock_region *plr = _plr;
940 unsigned long long l2_hits, l2_miss;
941 u64 l2_hit_bits, l2_miss_bits;
942 unsigned long i;
943#ifdef CONFIG_KASAN
944 /*
945 * The registers used for local register variables are also used
946 * when KASAN is active. When KASAN is active we use regular variables
947 * at the cost of including cache access latency to these variables
948 * in the measurements.
949 */
950 unsigned int line_size;
951 unsigned int size;
952 void *mem_r;
953#else
954 register unsigned int line_size asm("esi");
955 register unsigned int size asm("edi");
956#ifdef CONFIG_X86_64
957 register void *mem_r asm("rbx");
958#else
959 register void *mem_r asm("ebx");
960#endif /* CONFIG_X86_64 */
961#endif /* CONFIG_KASAN */
962
963 /*
964 * Non-architectural event for the Goldmont Microarchitecture
965 * from Intel x86 Architecture Software Developer Manual (SDM):
966 * MEM_LOAD_UOPS_RETIRED D1H (event number)
967 * Umask values:
968 * L1_HIT 01H
969 * L2_HIT 02H
970 * L1_MISS 08H
971 * L2_MISS 10H
972 *
973 * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
974 * has two "no fix" errata associated with it: BDM35 and BDM100. On
975 * this platform we use the following events instead:
976 * L2_RQSTS 24H (Documented in https://download.01.org/perfmon/BDW/)
977 * REFERENCES FFH
978 * MISS 3FH
979 * LONGEST_LAT_CACHE 2EH (Documented in SDM)
980 * REFERENCE 4FH
981 * MISS 41H
982 */
983
984 /*
985 * Start by setting flags for IA32_PERFEVTSELx:
986 * OS (Operating system mode) 0x2
987 * INT (APIC interrupt enable) 0x10
988 * EN (Enable counter) 0x40
989 *
990 * Then add the Umask value and event number to select performance
991 * event.
992 */
993
994 switch (boot_cpu_data.x86_model) {
995 case INTEL_FAM6_ATOM_GOLDMONT:
996 case INTEL_FAM6_ATOM_GEMINI_LAKE:
997 l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
998 l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
999 break;
1000 case INTEL_FAM6_BROADWELL_X:
1001 /* On BDW the l2_hit_bits count references, not hits */
1002 l2_hit_bits = (0x52ULL << 16) | (0xff << 8) | 0x24;
1003 l2_miss_bits = (0x52ULL << 16) | (0x3f << 8) | 0x24;
1004 /* On BDW the l3_hit_bits count references, not hits */
1005 l3_hit_bits = (0x52ULL << 16) | (0x4f << 8) | 0x2e;
1006 l3_miss_bits = (0x52ULL << 16) | (0x41 << 8) | 0x2e;
1007 break;
1008 default:
1009 goto out;
1010 }
1011
1012 local_irq_disable();
1013 /*
1014 * Call wrmsr direcly to avoid the local register variables from
1015 * being overwritten due to reordering of their assignment with
1016 * the wrmsr calls.
1017 */
1018 __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
1019 /* Disable events and reset counters */
1020 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, 0x0);
1021 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x0);
1022 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0, 0x0);
1023 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 1, 0x0);
1024 if (l3_hit_bits > 0) {
1025 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x0);
1026 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3, 0x0);
1027 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 2, 0x0);
1028 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 3, 0x0);
1029 }
1030 /* Set and enable the L2 counters */
1031 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, l2_hit_bits);
1032 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, l2_miss_bits);
1033 if (l3_hit_bits > 0) {
1034 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
1035 l3_hit_bits);
1036 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1037 l3_miss_bits);
1038 }
1039 mem_r = plr->kmem;
1040 size = plr->size;
1041 line_size = plr->line_size;
1042 for (i = 0; i < size; i += line_size) {
1043 asm volatile("mov (%0,%1,1), %%eax\n\t"
1044 :
1045 : "r" (mem_r), "r" (i)
1046 : "%eax", "memory");
1047 }
1048 /*
1049 * Call wrmsr directly (no tracing) to not influence
1050 * the cache access counters as they are disabled.
1051 */
1052 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0,
1053 l2_hit_bits & ~(0x40ULL << 16));
1054 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1,
1055 l2_miss_bits & ~(0x40ULL << 16));
1056 if (l3_hit_bits > 0) {
1057 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
1058 l3_hit_bits & ~(0x40ULL << 16));
1059 pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1060 l3_miss_bits & ~(0x40ULL << 16));
1061 }
1062 l2_hits = native_read_pmc(0);
1063 l2_miss = native_read_pmc(1);
1064 if (l3_hit_bits > 0) {
1065 l3_hits = native_read_pmc(2);
1066 l3_miss = native_read_pmc(3);
1067 }
1068 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
1069 local_irq_enable();
1070 /*
1071 * On BDW we count references and misses, need to adjust. Sometimes
1072 * the "hits" counter is a bit more than the references, for
1073 * example, x references but x + 1 hits. To not report invalid
1074 * hit values in this case we treat that as misses eaqual to
1075 * references.
1076 */
1077 if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
1078 l2_hits -= (l2_miss > l2_hits ? l2_hits : l2_miss);
1079 trace_pseudo_lock_l2(l2_hits, l2_miss);
1080 if (l3_hit_bits > 0) {
1081 if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
1082 l3_hits -= (l3_miss > l3_hits ? l3_hits : l3_miss);
1083 trace_pseudo_lock_l3(l3_hits, l3_miss);
1084 }
1085
1086out:
1087 plr->thread_done = 1;
1088 wake_up_interruptible(&plr->lock_thread_wq);
1089 return 0;
1090}
1091
1092/**
1093 * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
1094 *
1095 * The measurement of latency to access a pseudo-locked region should be
1096 * done from a cpu that is associated with that pseudo-locked region.
1097 * Determine which cpu is associated with this region and start a thread on
1098 * that cpu to perform the measurement, wait for that thread to complete.
1099 *
1100 * Return: 0 on success, <0 on failure
1101 */
1102static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
1103{
1104 struct pseudo_lock_region *plr = rdtgrp->plr;
1105 struct task_struct *thread;
1106 unsigned int cpu;
1107 int ret = -1;
1108
1109 cpus_read_lock();
1110 mutex_lock(&rdtgroup_mutex);
1111
1112 if (rdtgrp->flags & RDT_DELETED) {
1113 ret = -ENODEV;
1114 goto out;
1115 }
1116
1117 plr->thread_done = 0;
1118 cpu = cpumask_first(&plr->d->cpu_mask);
1119 if (!cpu_online(cpu)) {
1120 ret = -ENODEV;
1121 goto out;
1122 }
1123
1124 if (sel == 1)
1125 thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
1126 cpu_to_node(cpu),
1127 "pseudo_lock_measure/%u",
1128 cpu);
1129 else if (sel == 2)
1130 thread = kthread_create_on_node(measure_cycles_perf_fn, plr,
1131 cpu_to_node(cpu),
1132 "pseudo_lock_measure/%u",
1133 cpu);
1134 else
1135 goto out;
1136
1137 if (IS_ERR(thread)) {
1138 ret = PTR_ERR(thread);
1139 goto out;
1140 }
1141 kthread_bind(thread, cpu);
1142 wake_up_process(thread);
1143
1144 ret = wait_event_interruptible(plr->lock_thread_wq,
1145 plr->thread_done == 1);
1146 if (ret < 0)
1147 goto out;
1148
1149 ret = 0;
1150
1151out:
1152 mutex_unlock(&rdtgroup_mutex);
1153 cpus_read_unlock();
1154 return ret;
1155}
1156
1157static ssize_t pseudo_lock_measure_trigger(struct file *file,
1158 const char __user *user_buf,
1159 size_t count, loff_t *ppos)
1160{
1161 struct rdtgroup *rdtgrp = file->private_data;
1162 size_t buf_size;
1163 char buf[32];
1164 int ret;
1165 int sel;
1166
1167 buf_size = min(count, (sizeof(buf) - 1));
1168 if (copy_from_user(buf, user_buf, buf_size))
1169 return -EFAULT;
1170
1171 buf[buf_size] = '\0';
1172 ret = kstrtoint(buf, 10, &sel);
1173 if (ret == 0) {
1174 if (sel != 1)
1175 return -EINVAL;
1176 ret = debugfs_file_get(file->f_path.dentry);
1177 if (ret)
1178 return ret;
1179 ret = pseudo_lock_measure_cycles(rdtgrp, sel);
1180 if (ret == 0)
1181 ret = count;
1182 debugfs_file_put(file->f_path.dentry);
1183 }
1184
1185 return ret;
1186}
1187
1188static const struct file_operations pseudo_measure_fops = {
1189 .write = pseudo_lock_measure_trigger,
1190 .open = simple_open,
1191 .llseek = default_llseek,
1192};
1193
1194/**
1195 * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
1196 * @rdtgrp: resource group to which pseudo-lock region belongs
1197 *
1198 * Called when a resource group in the pseudo-locksetup mode receives a
1199 * valid schemata that should be pseudo-locked. Since the resource group is
1200 * in pseudo-locksetup mode the &struct pseudo_lock_region has already been
1201 * allocated and initialized with the essential information. If a failure
1202 * occurs the resource group remains in the pseudo-locksetup mode with the
1203 * &struct pseudo_lock_region associated with it, but cleared from all
1204 * information and ready for the user to re-attempt pseudo-locking by
1205 * writing the schemata again.
1206 *
1207 * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
1208 * on failure. Descriptive error will be written to last_cmd_status buffer.
1209 */
1210int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1211{
1212 struct pseudo_lock_region *plr = rdtgrp->plr;
1213 struct task_struct *thread;
1214 unsigned int new_minor;
1215 struct device *dev;
1216 int ret;
1217
1218 ret = pseudo_lock_region_alloc(plr);
1219 if (ret < 0)
1220 return ret;
1221
1222 ret = pseudo_lock_cstates_constrain(plr);
1223 if (ret < 0) {
1224 ret = -EINVAL;
1225 goto out_region;
1226 }
1227
1228 plr->thread_done = 0;
1229
1230 thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
1231 cpu_to_node(plr->cpu),
1232 "pseudo_lock/%u", plr->cpu);
1233 if (IS_ERR(thread)) {
1234 ret = PTR_ERR(thread);
1235 rdt_last_cmd_printf("locking thread returned error %d\n", ret);
1236 goto out_cstates;
1237 }
1238
1239 kthread_bind(thread, plr->cpu);
1240 wake_up_process(thread);
1241
1242 ret = wait_event_interruptible(plr->lock_thread_wq,
1243 plr->thread_done == 1);
1244 if (ret < 0) {
1245 /*
1246 * If the thread does not get on the CPU for whatever
1247 * reason and the process which sets up the region is
1248 * interrupted then this will leave the thread in runnable
1249 * state and once it gets on the CPU it will derefence
1250 * the cleared, but not freed, plr struct resulting in an
1251 * empty pseudo-locking loop.
1252 */
1253 rdt_last_cmd_puts("locking thread interrupted\n");
1254 goto out_cstates;
1255 }
1256
1257 ret = pseudo_lock_minor_get(&new_minor);
1258 if (ret < 0) {
1259 rdt_last_cmd_puts("unable to obtain a new minor number\n");
1260 goto out_cstates;
1261 }
1262
1263 /*
1264 * Unlock access but do not release the reference. The
1265 * pseudo-locked region will still be here on return.
1266 *
1267 * The mutex has to be released temporarily to avoid a potential
1268 * deadlock with the mm->mmap_sem semaphore which is obtained in
1269 * the device_create() and debugfs_create_dir() callpath below
1270 * as well as before the mmap() callback is called.
1271 */
1272 mutex_unlock(&rdtgroup_mutex);
1273
1274 if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
1275 plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
1276 debugfs_resctrl);
1277 if (!IS_ERR_OR_NULL(plr->debugfs_dir))
1278 debugfs_create_file("pseudo_lock_measure", 0200,
1279 plr->debugfs_dir, rdtgrp,
1280 &pseudo_measure_fops);
1281 }
1282
1283 dev = device_create(pseudo_lock_class, NULL,
1284 MKDEV(pseudo_lock_major, new_minor),
1285 rdtgrp, "%s", rdtgrp->kn->name);
1286
1287 mutex_lock(&rdtgroup_mutex);
1288
1289 if (IS_ERR(dev)) {
1290 ret = PTR_ERR(dev);
1291 rdt_last_cmd_printf("failed to create character device: %d\n",
1292 ret);
1293 goto out_debugfs;
1294 }
1295
1296 /* We released the mutex - check if group was removed while we did so */
1297 if (rdtgrp->flags & RDT_DELETED) {
1298 ret = -ENODEV;
1299 goto out_device;
1300 }
1301
1302 plr->minor = new_minor;
1303
1304 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
1305 closid_free(rdtgrp->closid);
1306 rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
1307 rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
1308
1309 ret = 0;
1310 goto out;
1311
1312out_device:
1313 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
1314out_debugfs:
1315 debugfs_remove_recursive(plr->debugfs_dir);
1316 pseudo_lock_minor_release(new_minor);
1317out_cstates:
1318 pseudo_lock_cstates_relax(plr);
1319out_region:
1320 pseudo_lock_region_clear(plr);
1321out:
1322 return ret;
1323}
1324
1325/**
1326 * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
1327 * @rdtgrp: resource group to which the pseudo-locked region belongs
1328 *
1329 * The removal of a pseudo-locked region can be initiated when the resource
1330 * group is removed from user space via a "rmdir" from userspace or the
1331 * unmount of the resctrl filesystem. On removal the resource group does
1332 * not go back to pseudo-locksetup mode before it is removed, instead it is
1333 * removed directly. There is thus assymmetry with the creation where the
1334 * &struct pseudo_lock_region is removed here while it was not created in
1335 * rdtgroup_pseudo_lock_create().
1336 *
1337 * Return: void
1338 */
1339void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
1340{
1341 struct pseudo_lock_region *plr = rdtgrp->plr;
1342
1343 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1344 /*
1345 * Default group cannot be a pseudo-locked region so we can
1346 * free closid here.
1347 */
1348 closid_free(rdtgrp->closid);
1349 goto free;
1350 }
1351
1352 pseudo_lock_cstates_relax(plr);
1353 debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
1354 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
1355 pseudo_lock_minor_release(plr->minor);
1356
1357free:
1358 pseudo_lock_free(rdtgrp);
1359}
1360
1361static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
1362{
1363 struct rdtgroup *rdtgrp;
1364
1365 mutex_lock(&rdtgroup_mutex);
1366
1367 rdtgrp = region_find_by_minor(iminor(inode));
1368 if (!rdtgrp) {
1369 mutex_unlock(&rdtgroup_mutex);
1370 return -ENODEV;
1371 }
1372
1373 filp->private_data = rdtgrp;
1374 atomic_inc(&rdtgrp->waitcount);
1375 /* Perform a non-seekable open - llseek is not supported */
1376 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1377
1378 mutex_unlock(&rdtgroup_mutex);
1379
1380 return 0;
1381}
1382
1383static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
1384{
1385 struct rdtgroup *rdtgrp;
1386
1387 mutex_lock(&rdtgroup_mutex);
1388 rdtgrp = filp->private_data;
1389 WARN_ON(!rdtgrp);
1390 if (!rdtgrp) {
1391 mutex_unlock(&rdtgroup_mutex);
1392 return -ENODEV;
1393 }
1394 filp->private_data = NULL;
1395 atomic_dec(&rdtgrp->waitcount);
1396 mutex_unlock(&rdtgroup_mutex);
1397 return 0;
1398}
1399
1400static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
1401{
1402 /* Not supported */
1403 return -EINVAL;
1404}
1405
1406static const struct vm_operations_struct pseudo_mmap_ops = {
1407 .mremap = pseudo_lock_dev_mremap,
1408};
1409
1410static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
1411{
1412 unsigned long vsize = vma->vm_end - vma->vm_start;
1413 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
1414 struct pseudo_lock_region *plr;
1415 struct rdtgroup *rdtgrp;
1416 unsigned long physical;
1417 unsigned long psize;
1418
1419 mutex_lock(&rdtgroup_mutex);
1420
1421 rdtgrp = filp->private_data;
1422 WARN_ON(!rdtgrp);
1423 if (!rdtgrp) {
1424 mutex_unlock(&rdtgroup_mutex);
1425 return -ENODEV;
1426 }
1427
1428 plr = rdtgrp->plr;
1429
1430 /*
1431 * Task is required to run with affinity to the cpus associated
1432 * with the pseudo-locked region. If this is not the case the task
1433 * may be scheduled elsewhere and invalidate entries in the
1434 * pseudo-locked region.
1435 */
1436 if (!cpumask_subset(&current->cpus_allowed, &plr->d->cpu_mask)) {
1437 mutex_unlock(&rdtgroup_mutex);
1438 return -EINVAL;
1439 }
1440
1441 physical = __pa(plr->kmem) >> PAGE_SHIFT;
1442 psize = plr->size - off;
1443
1444 if (off > plr->size) {
1445 mutex_unlock(&rdtgroup_mutex);
1446 return -ENOSPC;
1447 }
1448
1449 /*
1450 * Ensure changes are carried directly to the memory being mapped,
1451 * do not allow copy-on-write mapping.
1452 */
1453 if (!(vma->vm_flags & VM_SHARED)) {
1454 mutex_unlock(&rdtgroup_mutex);
1455 return -EINVAL;
1456 }
1457
1458 if (vsize > psize) {
1459 mutex_unlock(&rdtgroup_mutex);
1460 return -ENOSPC;
1461 }
1462
1463 memset(plr->kmem + off, 0, vsize);
1464
1465 if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
1466 vsize, vma->vm_page_prot)) {
1467 mutex_unlock(&rdtgroup_mutex);
1468 return -EAGAIN;
1469 }
1470 vma->vm_ops = &pseudo_mmap_ops;
1471 mutex_unlock(&rdtgroup_mutex);
1472 return 0;
1473}
1474
1475static const struct file_operations pseudo_lock_dev_fops = {
1476 .owner = THIS_MODULE,
1477 .llseek = no_llseek,
1478 .read = NULL,
1479 .write = NULL,
1480 .open = pseudo_lock_dev_open,
1481 .release = pseudo_lock_dev_release,
1482 .mmap = pseudo_lock_dev_mmap,
1483};
1484
1485static char *pseudo_lock_devnode(struct device *dev, umode_t *mode)
1486{
1487 struct rdtgroup *rdtgrp;
1488
1489 rdtgrp = dev_get_drvdata(dev);
1490 if (mode)
1491 *mode = 0600;
1492 return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
1493}
1494
1495int rdt_pseudo_lock_init(void)
1496{
1497 int ret;
1498
1499 ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
1500 if (ret < 0)
1501 return ret;
1502
1503 pseudo_lock_major = ret;
1504
1505 pseudo_lock_class = class_create(THIS_MODULE, "pseudo_lock");
1506 if (IS_ERR(pseudo_lock_class)) {
1507 ret = PTR_ERR(pseudo_lock_class);
1508 unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1509 return ret;
1510 }
1511
1512 pseudo_lock_class->devnode = pseudo_lock_devnode;
1513 return 0;
1514}
1515
1516void rdt_pseudo_lock_release(void)
1517{
1518 class_destroy(pseudo_lock_class);
1519 pseudo_lock_class = NULL;
1520 unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1521 pseudo_lock_major = 0;
1522}
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h
new file mode 100644
index 000000000000..2c041e6d9f05
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h
@@ -0,0 +1,43 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#undef TRACE_SYSTEM
3#define TRACE_SYSTEM resctrl
4
5#if !defined(_TRACE_PSEUDO_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
6#define _TRACE_PSEUDO_LOCK_H
7
8#include <linux/tracepoint.h>
9
10TRACE_EVENT(pseudo_lock_mem_latency,
11 TP_PROTO(u32 latency),
12 TP_ARGS(latency),
13 TP_STRUCT__entry(__field(u32, latency)),
14 TP_fast_assign(__entry->latency = latency),
15 TP_printk("latency=%u", __entry->latency)
16 );
17
18TRACE_EVENT(pseudo_lock_l2,
19 TP_PROTO(u64 l2_hits, u64 l2_miss),
20 TP_ARGS(l2_hits, l2_miss),
21 TP_STRUCT__entry(__field(u64, l2_hits)
22 __field(u64, l2_miss)),
23 TP_fast_assign(__entry->l2_hits = l2_hits;
24 __entry->l2_miss = l2_miss;),
25 TP_printk("hits=%llu miss=%llu",
26 __entry->l2_hits, __entry->l2_miss));
27
28TRACE_EVENT(pseudo_lock_l3,
29 TP_PROTO(u64 l3_hits, u64 l3_miss),
30 TP_ARGS(l3_hits, l3_miss),
31 TP_STRUCT__entry(__field(u64, l3_hits)
32 __field(u64, l3_miss)),
33 TP_fast_assign(__entry->l3_hits = l3_hits;
34 __entry->l3_miss = l3_miss;),
35 TP_printk("hits=%llu miss=%llu",
36 __entry->l3_hits, __entry->l3_miss));
37
38#endif /* _TRACE_PSEUDO_LOCK_H */
39
40#undef TRACE_INCLUDE_PATH
41#define TRACE_INCLUDE_PATH .
42#define TRACE_INCLUDE_FILE intel_rdt_pseudo_lock_event
43#include <trace/define_trace.h>
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 749856a2e736..d6d7ea7349d0 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -20,7 +20,9 @@
20 20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 22
23#include <linux/cacheinfo.h>
23#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/debugfs.h>
24#include <linux/fs.h> 26#include <linux/fs.h>
25#include <linux/sysfs.h> 27#include <linux/sysfs.h>
26#include <linux/kernfs.h> 28#include <linux/kernfs.h>
@@ -55,6 +57,8 @@ static struct kernfs_node *kn_mondata;
55static struct seq_buf last_cmd_status; 57static struct seq_buf last_cmd_status;
56static char last_cmd_status_buf[512]; 58static char last_cmd_status_buf[512];
57 59
60struct dentry *debugfs_resctrl;
61
58void rdt_last_cmd_clear(void) 62void rdt_last_cmd_clear(void)
59{ 63{
60 lockdep_assert_held(&rdtgroup_mutex); 64 lockdep_assert_held(&rdtgroup_mutex);
@@ -121,11 +125,65 @@ static int closid_alloc(void)
121 return closid; 125 return closid;
122} 126}
123 127
124static void closid_free(int closid) 128void closid_free(int closid)
125{ 129{
126 closid_free_map |= 1 << closid; 130 closid_free_map |= 1 << closid;
127} 131}
128 132
133/**
134 * closid_allocated - test if provided closid is in use
135 * @closid: closid to be tested
136 *
137 * Return: true if @closid is currently associated with a resource group,
138 * false if @closid is free
139 */
140static bool closid_allocated(unsigned int closid)
141{
142 return (closid_free_map & (1 << closid)) == 0;
143}
144
145/**
146 * rdtgroup_mode_by_closid - Return mode of resource group with closid
147 * @closid: closid if the resource group
148 *
149 * Each resource group is associated with a @closid. Here the mode
150 * of a resource group can be queried by searching for it using its closid.
151 *
152 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
153 */
154enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
155{
156 struct rdtgroup *rdtgrp;
157
158 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
159 if (rdtgrp->closid == closid)
160 return rdtgrp->mode;
161 }
162
163 return RDT_NUM_MODES;
164}
165
166static const char * const rdt_mode_str[] = {
167 [RDT_MODE_SHAREABLE] = "shareable",
168 [RDT_MODE_EXCLUSIVE] = "exclusive",
169 [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup",
170 [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked",
171};
172
173/**
174 * rdtgroup_mode_str - Return the string representation of mode
175 * @mode: the resource group mode as &enum rdtgroup_mode
176 *
177 * Return: string representation of valid mode, "unknown" otherwise
178 */
179static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
180{
181 if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
182 return "unknown";
183
184 return rdt_mode_str[mode];
185}
186
129/* set uid and gid of rdtgroup dirs and files to that of the creator */ 187/* set uid and gid of rdtgroup dirs and files to that of the creator */
130static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) 188static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
131{ 189{
@@ -207,8 +265,12 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
207 rdtgrp = rdtgroup_kn_lock_live(of->kn); 265 rdtgrp = rdtgroup_kn_lock_live(of->kn);
208 266
209 if (rdtgrp) { 267 if (rdtgrp) {
210 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", 268 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
211 cpumask_pr_args(&rdtgrp->cpu_mask)); 269 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
270 cpumask_pr_args(&rdtgrp->plr->d->cpu_mask));
271 else
272 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
273 cpumask_pr_args(&rdtgrp->cpu_mask));
212 } else { 274 } else {
213 ret = -ENOENT; 275 ret = -ENOENT;
214 } 276 }
@@ -394,6 +456,13 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
394 goto unlock; 456 goto unlock;
395 } 457 }
396 458
459 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
460 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
461 ret = -EINVAL;
462 rdt_last_cmd_puts("pseudo-locking in progress\n");
463 goto unlock;
464 }
465
397 if (is_cpu_list(of)) 466 if (is_cpu_list(of))
398 ret = cpulist_parse(buf, newmask); 467 ret = cpulist_parse(buf, newmask);
399 else 468 else
@@ -509,6 +578,32 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
509 return ret; 578 return ret;
510} 579}
511 580
581/**
582 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
583 * @r: Resource group
584 *
585 * Return: 1 if tasks have been assigned to @r, 0 otherwise
586 */
587int rdtgroup_tasks_assigned(struct rdtgroup *r)
588{
589 struct task_struct *p, *t;
590 int ret = 0;
591
592 lockdep_assert_held(&rdtgroup_mutex);
593
594 rcu_read_lock();
595 for_each_process_thread(p, t) {
596 if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
597 (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
598 ret = 1;
599 break;
600 }
601 }
602 rcu_read_unlock();
603
604 return ret;
605}
606
512static int rdtgroup_task_write_permission(struct task_struct *task, 607static int rdtgroup_task_write_permission(struct task_struct *task,
513 struct kernfs_open_file *of) 608 struct kernfs_open_file *of)
514{ 609{
@@ -570,13 +665,22 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
570 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) 665 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
571 return -EINVAL; 666 return -EINVAL;
572 rdtgrp = rdtgroup_kn_lock_live(of->kn); 667 rdtgrp = rdtgroup_kn_lock_live(of->kn);
668 if (!rdtgrp) {
669 rdtgroup_kn_unlock(of->kn);
670 return -ENOENT;
671 }
573 rdt_last_cmd_clear(); 672 rdt_last_cmd_clear();
574 673
575 if (rdtgrp) 674 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
576 ret = rdtgroup_move_task(pid, rdtgrp, of); 675 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
577 else 676 ret = -EINVAL;
578 ret = -ENOENT; 677 rdt_last_cmd_puts("pseudo-locking in progress\n");
678 goto unlock;
679 }
579 680
681 ret = rdtgroup_move_task(pid, rdtgrp, of);
682
683unlock:
580 rdtgroup_kn_unlock(of->kn); 684 rdtgroup_kn_unlock(of->kn);
581 685
582 return ret ?: nbytes; 686 return ret ?: nbytes;
@@ -662,6 +766,94 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of,
662 return 0; 766 return 0;
663} 767}
664 768
769/**
770 * rdt_bit_usage_show - Display current usage of resources
771 *
772 * A domain is a shared resource that can now be allocated differently. Here
773 * we display the current regions of the domain as an annotated bitmask.
774 * For each domain of this resource its allocation bitmask
775 * is annotated as below to indicate the current usage of the corresponding bit:
776 * 0 - currently unused
777 * X - currently available for sharing and used by software and hardware
778 * H - currently used by hardware only but available for software use
779 * S - currently used and shareable by software only
780 * E - currently used exclusively by one resource group
781 * P - currently pseudo-locked by one resource group
782 */
783static int rdt_bit_usage_show(struct kernfs_open_file *of,
784 struct seq_file *seq, void *v)
785{
786 struct rdt_resource *r = of->kn->parent->priv;
787 u32 sw_shareable = 0, hw_shareable = 0;
788 u32 exclusive = 0, pseudo_locked = 0;
789 struct rdt_domain *dom;
790 int i, hwb, swb, excl, psl;
791 enum rdtgrp_mode mode;
792 bool sep = false;
793 u32 *ctrl;
794
795 mutex_lock(&rdtgroup_mutex);
796 hw_shareable = r->cache.shareable_bits;
797 list_for_each_entry(dom, &r->domains, list) {
798 if (sep)
799 seq_putc(seq, ';');
800 ctrl = dom->ctrl_val;
801 sw_shareable = 0;
802 exclusive = 0;
803 seq_printf(seq, "%d=", dom->id);
804 for (i = 0; i < r->num_closid; i++, ctrl++) {
805 if (!closid_allocated(i))
806 continue;
807 mode = rdtgroup_mode_by_closid(i);
808 switch (mode) {
809 case RDT_MODE_SHAREABLE:
810 sw_shareable |= *ctrl;
811 break;
812 case RDT_MODE_EXCLUSIVE:
813 exclusive |= *ctrl;
814 break;
815 case RDT_MODE_PSEUDO_LOCKSETUP:
816 /*
817 * RDT_MODE_PSEUDO_LOCKSETUP is possible
818 * here but not included since the CBM
819 * associated with this CLOSID in this mode
820 * is not initialized and no task or cpu can be
821 * assigned this CLOSID.
822 */
823 break;
824 case RDT_MODE_PSEUDO_LOCKED:
825 case RDT_NUM_MODES:
826 WARN(1,
827 "invalid mode for closid %d\n", i);
828 break;
829 }
830 }
831 for (i = r->cache.cbm_len - 1; i >= 0; i--) {
832 pseudo_locked = dom->plr ? dom->plr->cbm : 0;
833 hwb = test_bit(i, (unsigned long *)&hw_shareable);
834 swb = test_bit(i, (unsigned long *)&sw_shareable);
835 excl = test_bit(i, (unsigned long *)&exclusive);
836 psl = test_bit(i, (unsigned long *)&pseudo_locked);
837 if (hwb && swb)
838 seq_putc(seq, 'X');
839 else if (hwb && !swb)
840 seq_putc(seq, 'H');
841 else if (!hwb && swb)
842 seq_putc(seq, 'S');
843 else if (excl)
844 seq_putc(seq, 'E');
845 else if (psl)
846 seq_putc(seq, 'P');
847 else /* Unused bits remain */
848 seq_putc(seq, '0');
849 }
850 sep = true;
851 }
852 seq_putc(seq, '\n');
853 mutex_unlock(&rdtgroup_mutex);
854 return 0;
855}
856
665static int rdt_min_bw_show(struct kernfs_open_file *of, 857static int rdt_min_bw_show(struct kernfs_open_file *of,
666 struct seq_file *seq, void *v) 858 struct seq_file *seq, void *v)
667{ 859{
@@ -740,6 +932,269 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
740 return nbytes; 932 return nbytes;
741} 933}
742 934
935/*
936 * rdtgroup_mode_show - Display mode of this resource group
937 */
938static int rdtgroup_mode_show(struct kernfs_open_file *of,
939 struct seq_file *s, void *v)
940{
941 struct rdtgroup *rdtgrp;
942
943 rdtgrp = rdtgroup_kn_lock_live(of->kn);
944 if (!rdtgrp) {
945 rdtgroup_kn_unlock(of->kn);
946 return -ENOENT;
947 }
948
949 seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
950
951 rdtgroup_kn_unlock(of->kn);
952 return 0;
953}
954
955/**
956 * rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
957 * @r: Resource to which domain instance @d belongs.
958 * @d: The domain instance for which @closid is being tested.
959 * @cbm: Capacity bitmask being tested.
960 * @closid: Intended closid for @cbm.
961 * @exclusive: Only check if overlaps with exclusive resource groups
962 *
963 * Checks if provided @cbm intended to be used for @closid on domain
964 * @d overlaps with any other closids or other hardware usage associated
965 * with this domain. If @exclusive is true then only overlaps with
966 * resource groups in exclusive mode will be considered. If @exclusive
967 * is false then overlaps with any resource group or hardware entities
968 * will be considered.
969 *
970 * Return: false if CBM does not overlap, true if it does.
971 */
972bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
973 u32 _cbm, int closid, bool exclusive)
974{
975 unsigned long *cbm = (unsigned long *)&_cbm;
976 unsigned long *ctrl_b;
977 enum rdtgrp_mode mode;
978 u32 *ctrl;
979 int i;
980
981 /* Check for any overlap with regions used by hardware directly */
982 if (!exclusive) {
983 if (bitmap_intersects(cbm,
984 (unsigned long *)&r->cache.shareable_bits,
985 r->cache.cbm_len))
986 return true;
987 }
988
989 /* Check for overlap with other resource groups */
990 ctrl = d->ctrl_val;
991 for (i = 0; i < r->num_closid; i++, ctrl++) {
992 ctrl_b = (unsigned long *)ctrl;
993 mode = rdtgroup_mode_by_closid(i);
994 if (closid_allocated(i) && i != closid &&
995 mode != RDT_MODE_PSEUDO_LOCKSETUP) {
996 if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
997 if (exclusive) {
998 if (mode == RDT_MODE_EXCLUSIVE)
999 return true;
1000 continue;
1001 }
1002 return true;
1003 }
1004 }
1005 }
1006
1007 return false;
1008}
1009
1010/**
1011 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1012 *
1013 * An exclusive resource group implies that there should be no sharing of
1014 * its allocated resources. At the time this group is considered to be
1015 * exclusive this test can determine if its current schemata supports this
1016 * setting by testing for overlap with all other resource groups.
1017 *
1018 * Return: true if resource group can be exclusive, false if there is overlap
1019 * with allocations of other resource groups and thus this resource group
1020 * cannot be exclusive.
1021 */
1022static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1023{
1024 int closid = rdtgrp->closid;
1025 struct rdt_resource *r;
1026 struct rdt_domain *d;
1027
1028 for_each_alloc_enabled_rdt_resource(r) {
1029 list_for_each_entry(d, &r->domains, list) {
1030 if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1031 rdtgrp->closid, false))
1032 return false;
1033 }
1034 }
1035
1036 return true;
1037}
1038
1039/**
1040 * rdtgroup_mode_write - Modify the resource group's mode
1041 *
1042 */
1043static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1044 char *buf, size_t nbytes, loff_t off)
1045{
1046 struct rdtgroup *rdtgrp;
1047 enum rdtgrp_mode mode;
1048 int ret = 0;
1049
1050 /* Valid input requires a trailing newline */
1051 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1052 return -EINVAL;
1053 buf[nbytes - 1] = '\0';
1054
1055 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1056 if (!rdtgrp) {
1057 rdtgroup_kn_unlock(of->kn);
1058 return -ENOENT;
1059 }
1060
1061 rdt_last_cmd_clear();
1062
1063 mode = rdtgrp->mode;
1064
1065 if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1066 (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1067 (!strcmp(buf, "pseudo-locksetup") &&
1068 mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1069 (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1070 goto out;
1071
1072 if (mode == RDT_MODE_PSEUDO_LOCKED) {
1073 rdt_last_cmd_printf("cannot change pseudo-locked group\n");
1074 ret = -EINVAL;
1075 goto out;
1076 }
1077
1078 if (!strcmp(buf, "shareable")) {
1079 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1080 ret = rdtgroup_locksetup_exit(rdtgrp);
1081 if (ret)
1082 goto out;
1083 }
1084 rdtgrp->mode = RDT_MODE_SHAREABLE;
1085 } else if (!strcmp(buf, "exclusive")) {
1086 if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1087 rdt_last_cmd_printf("schemata overlaps\n");
1088 ret = -EINVAL;
1089 goto out;
1090 }
1091 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1092 ret = rdtgroup_locksetup_exit(rdtgrp);
1093 if (ret)
1094 goto out;
1095 }
1096 rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1097 } else if (!strcmp(buf, "pseudo-locksetup")) {
1098 ret = rdtgroup_locksetup_enter(rdtgrp);
1099 if (ret)
1100 goto out;
1101 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1102 } else {
1103 rdt_last_cmd_printf("unknown/unsupported mode\n");
1104 ret = -EINVAL;
1105 }
1106
1107out:
1108 rdtgroup_kn_unlock(of->kn);
1109 return ret ?: nbytes;
1110}
1111
1112/**
1113 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1114 * @r: RDT resource to which @d belongs.
1115 * @d: RDT domain instance.
1116 * @cbm: bitmask for which the size should be computed.
1117 *
1118 * The bitmask provided associated with the RDT domain instance @d will be
1119 * translated into how many bytes it represents. The size in bytes is
1120 * computed by first dividing the total cache size by the CBM length to
1121 * determine how many bytes each bit in the bitmask represents. The result
1122 * is multiplied with the number of bits set in the bitmask.
1123 */
1124unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1125 struct rdt_domain *d, u32 cbm)
1126{
1127 struct cpu_cacheinfo *ci;
1128 unsigned int size = 0;
1129 int num_b, i;
1130
1131 num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
1132 ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
1133 for (i = 0; i < ci->num_leaves; i++) {
1134 if (ci->info_list[i].level == r->cache_level) {
1135 size = ci->info_list[i].size / r->cache.cbm_len * num_b;
1136 break;
1137 }
1138 }
1139
1140 return size;
1141}
1142
1143/**
1144 * rdtgroup_size_show - Display size in bytes of allocated regions
1145 *
1146 * The "size" file mirrors the layout of the "schemata" file, printing the
1147 * size in bytes of each region instead of the capacity bitmask.
1148 *
1149 */
1150static int rdtgroup_size_show(struct kernfs_open_file *of,
1151 struct seq_file *s, void *v)
1152{
1153 struct rdtgroup *rdtgrp;
1154 struct rdt_resource *r;
1155 struct rdt_domain *d;
1156 unsigned int size;
1157 bool sep = false;
1158 u32 cbm;
1159
1160 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1161 if (!rdtgrp) {
1162 rdtgroup_kn_unlock(of->kn);
1163 return -ENOENT;
1164 }
1165
1166 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1167 seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
1168 size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
1169 rdtgrp->plr->d,
1170 rdtgrp->plr->cbm);
1171 seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
1172 goto out;
1173 }
1174
1175 for_each_alloc_enabled_rdt_resource(r) {
1176 seq_printf(s, "%*s:", max_name_width, r->name);
1177 list_for_each_entry(d, &r->domains, list) {
1178 if (sep)
1179 seq_putc(s, ';');
1180 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1181 size = 0;
1182 } else {
1183 cbm = d->ctrl_val[rdtgrp->closid];
1184 size = rdtgroup_cbm_to_size(r, d, cbm);
1185 }
1186 seq_printf(s, "%d=%u", d->id, size);
1187 sep = true;
1188 }
1189 seq_putc(s, '\n');
1190 }
1191
1192out:
1193 rdtgroup_kn_unlock(of->kn);
1194
1195 return 0;
1196}
1197
743/* rdtgroup information files for one cache resource. */ 1198/* rdtgroup information files for one cache resource. */
744static struct rftype res_common_files[] = { 1199static struct rftype res_common_files[] = {
745 { 1200 {
@@ -792,6 +1247,13 @@ static struct rftype res_common_files[] = {
792 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, 1247 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
793 }, 1248 },
794 { 1249 {
1250 .name = "bit_usage",
1251 .mode = 0444,
1252 .kf_ops = &rdtgroup_kf_single_ops,
1253 .seq_show = rdt_bit_usage_show,
1254 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
1255 },
1256 {
795 .name = "min_bandwidth", 1257 .name = "min_bandwidth",
796 .mode = 0444, 1258 .mode = 0444,
797 .kf_ops = &rdtgroup_kf_single_ops, 1259 .kf_ops = &rdtgroup_kf_single_ops,
@@ -853,6 +1315,22 @@ static struct rftype res_common_files[] = {
853 .seq_show = rdtgroup_schemata_show, 1315 .seq_show = rdtgroup_schemata_show,
854 .fflags = RF_CTRL_BASE, 1316 .fflags = RF_CTRL_BASE,
855 }, 1317 },
1318 {
1319 .name = "mode",
1320 .mode = 0644,
1321 .kf_ops = &rdtgroup_kf_single_ops,
1322 .write = rdtgroup_mode_write,
1323 .seq_show = rdtgroup_mode_show,
1324 .fflags = RF_CTRL_BASE,
1325 },
1326 {
1327 .name = "size",
1328 .mode = 0444,
1329 .kf_ops = &rdtgroup_kf_single_ops,
1330 .seq_show = rdtgroup_size_show,
1331 .fflags = RF_CTRL_BASE,
1332 },
1333
856}; 1334};
857 1335
858static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) 1336static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
@@ -883,6 +1361,103 @@ error:
883 return ret; 1361 return ret;
884} 1362}
885 1363
1364/**
1365 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
1366 * @r: The resource group with which the file is associated.
1367 * @name: Name of the file
1368 *
1369 * The permissions of named resctrl file, directory, or link are modified
1370 * to not allow read, write, or execute by any user.
1371 *
1372 * WARNING: This function is intended to communicate to the user that the
1373 * resctrl file has been locked down - that it is not relevant to the
1374 * particular state the system finds itself in. It should not be relied
1375 * on to protect from user access because after the file's permissions
1376 * are restricted the user can still change the permissions using chmod
1377 * from the command line.
1378 *
1379 * Return: 0 on success, <0 on failure.
1380 */
1381int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
1382{
1383 struct iattr iattr = {.ia_valid = ATTR_MODE,};
1384 struct kernfs_node *kn;
1385 int ret = 0;
1386
1387 kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1388 if (!kn)
1389 return -ENOENT;
1390
1391 switch (kernfs_type(kn)) {
1392 case KERNFS_DIR:
1393 iattr.ia_mode = S_IFDIR;
1394 break;
1395 case KERNFS_FILE:
1396 iattr.ia_mode = S_IFREG;
1397 break;
1398 case KERNFS_LINK:
1399 iattr.ia_mode = S_IFLNK;
1400 break;
1401 }
1402
1403 ret = kernfs_setattr(kn, &iattr);
1404 kernfs_put(kn);
1405 return ret;
1406}
1407
1408/**
1409 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
1410 * @r: The resource group with which the file is associated.
1411 * @name: Name of the file
1412 * @mask: Mask of permissions that should be restored
1413 *
1414 * Restore the permissions of the named file. If @name is a directory the
1415 * permissions of its parent will be used.
1416 *
1417 * Return: 0 on success, <0 on failure.
1418 */
1419int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
1420 umode_t mask)
1421{
1422 struct iattr iattr = {.ia_valid = ATTR_MODE,};
1423 struct kernfs_node *kn, *parent;
1424 struct rftype *rfts, *rft;
1425 int ret, len;
1426
1427 rfts = res_common_files;
1428 len = ARRAY_SIZE(res_common_files);
1429
1430 for (rft = rfts; rft < rfts + len; rft++) {
1431 if (!strcmp(rft->name, name))
1432 iattr.ia_mode = rft->mode & mask;
1433 }
1434
1435 kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1436 if (!kn)
1437 return -ENOENT;
1438
1439 switch (kernfs_type(kn)) {
1440 case KERNFS_DIR:
1441 parent = kernfs_get_parent(kn);
1442 if (parent) {
1443 iattr.ia_mode |= parent->mode;
1444 kernfs_put(parent);
1445 }
1446 iattr.ia_mode |= S_IFDIR;
1447 break;
1448 case KERNFS_FILE:
1449 iattr.ia_mode |= S_IFREG;
1450 break;
1451 case KERNFS_LINK:
1452 iattr.ia_mode |= S_IFLNK;
1453 break;
1454 }
1455
1456 ret = kernfs_setattr(kn, &iattr);
1457 kernfs_put(kn);
1458 return ret;
1459}
1460
886static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, 1461static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
887 unsigned long fflags) 1462 unsigned long fflags)
888{ 1463{
@@ -1224,6 +1799,9 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
1224 1799
1225 if (atomic_dec_and_test(&rdtgrp->waitcount) && 1800 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
1226 (rdtgrp->flags & RDT_DELETED)) { 1801 (rdtgrp->flags & RDT_DELETED)) {
1802 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
1803 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
1804 rdtgroup_pseudo_lock_remove(rdtgrp);
1227 kernfs_unbreak_active_protection(kn); 1805 kernfs_unbreak_active_protection(kn);
1228 kernfs_put(rdtgrp->kn); 1806 kernfs_put(rdtgrp->kn);
1229 kfree(rdtgrp); 1807 kfree(rdtgrp);
@@ -1289,10 +1867,16 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
1289 rdtgroup_default.mon.mon_data_kn = kn_mondata; 1867 rdtgroup_default.mon.mon_data_kn = kn_mondata;
1290 } 1868 }
1291 1869
1870 ret = rdt_pseudo_lock_init();
1871 if (ret) {
1872 dentry = ERR_PTR(ret);
1873 goto out_mondata;
1874 }
1875
1292 dentry = kernfs_mount(fs_type, flags, rdt_root, 1876 dentry = kernfs_mount(fs_type, flags, rdt_root,
1293 RDTGROUP_SUPER_MAGIC, NULL); 1877 RDTGROUP_SUPER_MAGIC, NULL);
1294 if (IS_ERR(dentry)) 1878 if (IS_ERR(dentry))
1295 goto out_mondata; 1879 goto out_psl;
1296 1880
1297 if (rdt_alloc_capable) 1881 if (rdt_alloc_capable)
1298 static_branch_enable_cpuslocked(&rdt_alloc_enable_key); 1882 static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
@@ -1310,6 +1894,8 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
1310 1894
1311 goto out; 1895 goto out;
1312 1896
1897out_psl:
1898 rdt_pseudo_lock_release();
1313out_mondata: 1899out_mondata:
1314 if (rdt_mon_capable) 1900 if (rdt_mon_capable)
1315 kernfs_remove(kn_mondata); 1901 kernfs_remove(kn_mondata);
@@ -1447,6 +2033,10 @@ static void rmdir_all_sub(void)
1447 if (rdtgrp == &rdtgroup_default) 2033 if (rdtgrp == &rdtgroup_default)
1448 continue; 2034 continue;
1449 2035
2036 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2037 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2038 rdtgroup_pseudo_lock_remove(rdtgrp);
2039
1450 /* 2040 /*
1451 * Give any CPUs back to the default group. We cannot copy 2041 * Give any CPUs back to the default group. We cannot copy
1452 * cpu_online_mask because a CPU might have executed the 2042 * cpu_online_mask because a CPU might have executed the
@@ -1483,6 +2073,8 @@ static void rdt_kill_sb(struct super_block *sb)
1483 reset_all_ctrls(r); 2073 reset_all_ctrls(r);
1484 cdp_disable_all(); 2074 cdp_disable_all();
1485 rmdir_all_sub(); 2075 rmdir_all_sub();
2076 rdt_pseudo_lock_release();
2077 rdtgroup_default.mode = RDT_MODE_SHAREABLE;
1486 static_branch_disable_cpuslocked(&rdt_alloc_enable_key); 2078 static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
1487 static_branch_disable_cpuslocked(&rdt_mon_enable_key); 2079 static_branch_disable_cpuslocked(&rdt_mon_enable_key);
1488 static_branch_disable_cpuslocked(&rdt_enable_key); 2080 static_branch_disable_cpuslocked(&rdt_enable_key);
@@ -1682,6 +2274,114 @@ out_destroy:
1682 return ret; 2274 return ret;
1683} 2275}
1684 2276
2277/**
2278 * cbm_ensure_valid - Enforce validity on provided CBM
2279 * @_val: Candidate CBM
2280 * @r: RDT resource to which the CBM belongs
2281 *
2282 * The provided CBM represents all cache portions available for use. This
2283 * may be represented by a bitmap that does not consist of contiguous ones
2284 * and thus be an invalid CBM.
2285 * Here the provided CBM is forced to be a valid CBM by only considering
2286 * the first set of contiguous bits as valid and clearing all bits.
2287 * The intention here is to provide a valid default CBM with which a new
2288 * resource group is initialized. The user can follow this with a
2289 * modification to the CBM if the default does not satisfy the
2290 * requirements.
2291 */
2292static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
2293{
2294 /*
2295 * Convert the u32 _val to an unsigned long required by all the bit
2296 * operations within this function. No more than 32 bits of this
2297 * converted value can be accessed because all bit operations are
2298 * additionally provided with cbm_len that is initialized during
2299 * hardware enumeration using five bits from the EAX register and
2300 * thus never can exceed 32 bits.
2301 */
2302 unsigned long *val = (unsigned long *)_val;
2303 unsigned int cbm_len = r->cache.cbm_len;
2304 unsigned long first_bit, zero_bit;
2305
2306 if (*val == 0)
2307 return;
2308
2309 first_bit = find_first_bit(val, cbm_len);
2310 zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
2311
2312 /* Clear any remaining bits to ensure contiguous region */
2313 bitmap_clear(val, zero_bit, cbm_len - zero_bit);
2314}
2315
2316/**
2317 * rdtgroup_init_alloc - Initialize the new RDT group's allocations
2318 *
2319 * A new RDT group is being created on an allocation capable (CAT)
2320 * supporting system. Set this group up to start off with all usable
2321 * allocations. That is, all shareable and unused bits.
2322 *
2323 * All-zero CBM is invalid. If there are no more shareable bits available
2324 * on any domain then the entire allocation will fail.
2325 */
2326static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2327{
2328 u32 used_b = 0, unused_b = 0;
2329 u32 closid = rdtgrp->closid;
2330 struct rdt_resource *r;
2331 enum rdtgrp_mode mode;
2332 struct rdt_domain *d;
2333 int i, ret;
2334 u32 *ctrl;
2335
2336 for_each_alloc_enabled_rdt_resource(r) {
2337 list_for_each_entry(d, &r->domains, list) {
2338 d->have_new_ctrl = false;
2339 d->new_ctrl = r->cache.shareable_bits;
2340 used_b = r->cache.shareable_bits;
2341 ctrl = d->ctrl_val;
2342 for (i = 0; i < r->num_closid; i++, ctrl++) {
2343 if (closid_allocated(i) && i != closid) {
2344 mode = rdtgroup_mode_by_closid(i);
2345 if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
2346 break;
2347 used_b |= *ctrl;
2348 if (mode == RDT_MODE_SHAREABLE)
2349 d->new_ctrl |= *ctrl;
2350 }
2351 }
2352 if (d->plr && d->plr->cbm > 0)
2353 used_b |= d->plr->cbm;
2354 unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
2355 unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
2356 d->new_ctrl |= unused_b;
2357 /*
2358 * Force the initial CBM to be valid, user can
2359 * modify the CBM based on system availability.
2360 */
2361 cbm_ensure_valid(&d->new_ctrl, r);
2362 if (bitmap_weight((unsigned long *) &d->new_ctrl,
2363 r->cache.cbm_len) <
2364 r->cache.min_cbm_bits) {
2365 rdt_last_cmd_printf("no space on %s:%d\n",
2366 r->name, d->id);
2367 return -ENOSPC;
2368 }
2369 d->have_new_ctrl = true;
2370 }
2371 }
2372
2373 for_each_alloc_enabled_rdt_resource(r) {
2374 ret = update_domains(r, rdtgrp->closid);
2375 if (ret < 0) {
2376 rdt_last_cmd_puts("failed to initialize allocations\n");
2377 return ret;
2378 }
2379 rdtgrp->mode = RDT_MODE_SHAREABLE;
2380 }
2381
2382 return 0;
2383}
2384
1685static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, 2385static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
1686 struct kernfs_node *prgrp_kn, 2386 struct kernfs_node *prgrp_kn,
1687 const char *name, umode_t mode, 2387 const char *name, umode_t mode,
@@ -1700,6 +2400,14 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
1700 goto out_unlock; 2400 goto out_unlock;
1701 } 2401 }
1702 2402
2403 if (rtype == RDTMON_GROUP &&
2404 (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2405 prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
2406 ret = -EINVAL;
2407 rdt_last_cmd_puts("pseudo-locking in progress\n");
2408 goto out_unlock;
2409 }
2410
1703 /* allocate the rdtgroup. */ 2411 /* allocate the rdtgroup. */
1704 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); 2412 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
1705 if (!rdtgrp) { 2413 if (!rdtgrp) {
@@ -1840,6 +2548,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
1840 ret = 0; 2548 ret = 0;
1841 2549
1842 rdtgrp->closid = closid; 2550 rdtgrp->closid = closid;
2551 ret = rdtgroup_init_alloc(rdtgrp);
2552 if (ret < 0)
2553 goto out_id_free;
2554
1843 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); 2555 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
1844 2556
1845 if (rdt_mon_capable) { 2557 if (rdt_mon_capable) {
@@ -1850,15 +2562,16 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
1850 ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); 2562 ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
1851 if (ret) { 2563 if (ret) {
1852 rdt_last_cmd_puts("kernfs subdir error\n"); 2564 rdt_last_cmd_puts("kernfs subdir error\n");
1853 goto out_id_free; 2565 goto out_del_list;
1854 } 2566 }
1855 } 2567 }
1856 2568
1857 goto out_unlock; 2569 goto out_unlock;
1858 2570
2571out_del_list:
2572 list_del(&rdtgrp->rdtgroup_list);
1859out_id_free: 2573out_id_free:
1860 closid_free(closid); 2574 closid_free(closid);
1861 list_del(&rdtgrp->rdtgroup_list);
1862out_common_fail: 2575out_common_fail:
1863 mkdir_rdt_prepare_clean(rdtgrp); 2576 mkdir_rdt_prepare_clean(rdtgrp);
1864out_unlock: 2577out_unlock:
@@ -1945,6 +2658,21 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
1945 return 0; 2658 return 0;
1946} 2659}
1947 2660
2661static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
2662 struct rdtgroup *rdtgrp)
2663{
2664 rdtgrp->flags = RDT_DELETED;
2665 list_del(&rdtgrp->rdtgroup_list);
2666
2667 /*
2668 * one extra hold on this, will drop when we kfree(rdtgrp)
2669 * in rdtgroup_kn_unlock()
2670 */
2671 kernfs_get(kn);
2672 kernfs_remove(rdtgrp->kn);
2673 return 0;
2674}
2675
1948static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, 2676static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
1949 cpumask_var_t tmpmask) 2677 cpumask_var_t tmpmask)
1950{ 2678{
@@ -1970,7 +2698,6 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
1970 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); 2698 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
1971 update_closid_rmid(tmpmask, NULL); 2699 update_closid_rmid(tmpmask, NULL);
1972 2700
1973 rdtgrp->flags = RDT_DELETED;
1974 closid_free(rdtgrp->closid); 2701 closid_free(rdtgrp->closid);
1975 free_rmid(rdtgrp->mon.rmid); 2702 free_rmid(rdtgrp->mon.rmid);
1976 2703
@@ -1979,14 +2706,7 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
1979 */ 2706 */
1980 free_all_child_rdtgrp(rdtgrp); 2707 free_all_child_rdtgrp(rdtgrp);
1981 2708
1982 list_del(&rdtgrp->rdtgroup_list); 2709 rdtgroup_ctrl_remove(kn, rdtgrp);
1983
1984 /*
1985 * one extra hold on this, will drop when we kfree(rdtgrp)
1986 * in rdtgroup_kn_unlock()
1987 */
1988 kernfs_get(kn);
1989 kernfs_remove(rdtgrp->kn);
1990 2710
1991 return 0; 2711 return 0;
1992} 2712}
@@ -2014,13 +2734,19 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
2014 * If the rdtgroup is a mon group and parent directory 2734 * If the rdtgroup is a mon group and parent directory
2015 * is a valid "mon_groups" directory, remove the mon group. 2735 * is a valid "mon_groups" directory, remove the mon group.
2016 */ 2736 */
2017 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) 2737 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) {
2018 ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask); 2738 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2019 else if (rdtgrp->type == RDTMON_GROUP && 2739 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
2020 is_mon_groups(parent_kn, kn->name)) 2740 ret = rdtgroup_ctrl_remove(kn, rdtgrp);
2741 } else {
2742 ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
2743 }
2744 } else if (rdtgrp->type == RDTMON_GROUP &&
2745 is_mon_groups(parent_kn, kn->name)) {
2021 ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask); 2746 ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
2022 else 2747 } else {
2023 ret = -EPERM; 2748 ret = -EPERM;
2749 }
2024 2750
2025out: 2751out:
2026 rdtgroup_kn_unlock(kn); 2752 rdtgroup_kn_unlock(kn);
@@ -2046,7 +2772,8 @@ static int __init rdtgroup_setup_root(void)
2046 int ret; 2772 int ret;
2047 2773
2048 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, 2774 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
2049 KERNFS_ROOT_CREATE_DEACTIVATED, 2775 KERNFS_ROOT_CREATE_DEACTIVATED |
2776 KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
2050 &rdtgroup_default); 2777 &rdtgroup_default);
2051 if (IS_ERR(rdt_root)) 2778 if (IS_ERR(rdt_root))
2052 return PTR_ERR(rdt_root); 2779 return PTR_ERR(rdt_root);
@@ -2102,6 +2829,29 @@ int __init rdtgroup_init(void)
2102 if (ret) 2829 if (ret)
2103 goto cleanup_mountpoint; 2830 goto cleanup_mountpoint;
2104 2831
2832 /*
2833 * Adding the resctrl debugfs directory here may not be ideal since
2834 * it would let the resctrl debugfs directory appear on the debugfs
2835 * filesystem before the resctrl filesystem is mounted.
2836 * It may also be ok since that would enable debugging of RDT before
2837 * resctrl is mounted.
2838 * The reason why the debugfs directory is created here and not in
2839 * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and
2840 * during the debugfs directory creation also &sb->s_type->i_mutex_key
2841 * (the lockdep class of inode->i_rwsem). Other filesystem
2842 * interactions (eg. SyS_getdents) have the lock ordering:
2843 * &sb->s_type->i_mutex_key --> &mm->mmap_sem
2844 * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex
2845 * is taken, thus creating dependency:
2846 * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause
2847 * issues considering the other two lock dependencies.
2848 * By creating the debugfs directory here we avoid a dependency
2849 * that may cause deadlock (even though file operations cannot
2850 * occur until the filesystem is mounted, but I do not know how to
2851 * tell lockdep that).
2852 */
2853 debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
2854
2105 return 0; 2855 return 0;
2106 2856
2107cleanup_mountpoint: 2857cleanup_mountpoint:
@@ -2111,3 +2861,11 @@ cleanup_root:
2111 2861
2112 return ret; 2862 return ret;
2113} 2863}
2864
2865void __exit rdtgroup_exit(void)
2866{
2867 debugfs_remove_recursive(debugfs_resctrl);
2868 unregister_filesystem(&rdt_fs_type);
2869 sysfs_remove_mount_point(fs_kobj, "resctrl");
2870 kernfs_destroy_root(rdt_root);
2871}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8c50754c09c1..4b767284b7f5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -123,8 +123,8 @@ void mce_setup(struct mce *m)
123{ 123{
124 memset(m, 0, sizeof(struct mce)); 124 memset(m, 0, sizeof(struct mce));
125 m->cpu = m->extcpu = smp_processor_id(); 125 m->cpu = m->extcpu = smp_processor_id();
126 /* We hope get_seconds stays lockless */ 126 /* need the internal __ version to avoid deadlocks */
127 m->time = get_seconds(); 127 m->time = __ktime_get_real_seconds();
128 m->cpuvendor = boot_cpu_data.x86_vendor; 128 m->cpuvendor = boot_cpu_data.x86_vendor;
129 m->cpuid = cpuid_eax(1); 129 m->cpuid = cpuid_eax(1);
130 m->socketid = cpu_data(m->extcpu).phys_proc_id; 130 m->socketid = cpu_data(m->extcpu).phys_proc_id;
@@ -1104,6 +1104,101 @@ static void mce_unmap_kpfn(unsigned long pfn)
1104} 1104}
1105#endif 1105#endif
1106 1106
1107
1108/*
1109 * Cases where we avoid rendezvous handler timeout:
1110 * 1) If this CPU is offline.
1111 *
1112 * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
1113 * skip those CPUs which remain looping in the 1st kernel - see
1114 * crash_nmi_callback().
1115 *
1116 * Note: there still is a small window between kexec-ing and the new,
1117 * kdump kernel establishing a new #MC handler where a broadcasted MCE
1118 * might not get handled properly.
1119 */
1120static bool __mc_check_crashing_cpu(int cpu)
1121{
1122 if (cpu_is_offline(cpu) ||
1123 (crashing_cpu != -1 && crashing_cpu != cpu)) {
1124 u64 mcgstatus;
1125
1126 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
1127 if (mcgstatus & MCG_STATUS_RIPV) {
1128 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1129 return true;
1130 }
1131 }
1132 return false;
1133}
1134
1135static void __mc_scan_banks(struct mce *m, struct mce *final,
1136 unsigned long *toclear, unsigned long *valid_banks,
1137 int no_way_out, int *worst)
1138{
1139 struct mca_config *cfg = &mca_cfg;
1140 int severity, i;
1141
1142 for (i = 0; i < cfg->banks; i++) {
1143 __clear_bit(i, toclear);
1144 if (!test_bit(i, valid_banks))
1145 continue;
1146
1147 if (!mce_banks[i].ctl)
1148 continue;
1149
1150 m->misc = 0;
1151 m->addr = 0;
1152 m->bank = i;
1153
1154 m->status = mce_rdmsrl(msr_ops.status(i));
1155 if (!(m->status & MCI_STATUS_VAL))
1156 continue;
1157
1158 /*
1159 * Corrected or non-signaled errors are handled by
1160 * machine_check_poll(). Leave them alone, unless this panics.
1161 */
1162 if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
1163 !no_way_out)
1164 continue;
1165
1166 /* Set taint even when machine check was not enabled. */
1167 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
1168
1169 severity = mce_severity(m, cfg->tolerant, NULL, true);
1170
1171 /*
1172 * When machine check was for corrected/deferred handler don't
1173 * touch, unless we're panicking.
1174 */
1175 if ((severity == MCE_KEEP_SEVERITY ||
1176 severity == MCE_UCNA_SEVERITY) && !no_way_out)
1177 continue;
1178
1179 __set_bit(i, toclear);
1180
1181 /* Machine check event was not enabled. Clear, but ignore. */
1182 if (severity == MCE_NO_SEVERITY)
1183 continue;
1184
1185 mce_read_aux(m, i);
1186
1187 /* assuming valid severity level != 0 */
1188 m->severity = severity;
1189
1190 mce_log(m);
1191
1192 if (severity > *worst) {
1193 *final = *m;
1194 *worst = severity;
1195 }
1196 }
1197
1198 /* mce_clear_state will clear *final, save locally for use later */
1199 *m = *final;
1200}
1201
1107/* 1202/*
1108 * The actual machine check handler. This only handles real 1203 * The actual machine check handler. This only handles real
1109 * exceptions when something got corrupted coming in through int 18. 1204 * exceptions when something got corrupted coming in through int 18.
@@ -1118,68 +1213,45 @@ static void mce_unmap_kpfn(unsigned long pfn)
1118 */ 1213 */
1119void do_machine_check(struct pt_regs *regs, long error_code) 1214void do_machine_check(struct pt_regs *regs, long error_code)
1120{ 1215{
1216 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1217 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1121 struct mca_config *cfg = &mca_cfg; 1218 struct mca_config *cfg = &mca_cfg;
1219 int cpu = smp_processor_id();
1220 char *msg = "Unknown";
1122 struct mce m, *final; 1221 struct mce m, *final;
1123 int i;
1124 int worst = 0; 1222 int worst = 0;
1125 int severity;
1126 1223
1127 /* 1224 /*
1128 * Establish sequential order between the CPUs entering the machine 1225 * Establish sequential order between the CPUs entering the machine
1129 * check handler. 1226 * check handler.
1130 */ 1227 */
1131 int order = -1; 1228 int order = -1;
1229
1132 /* 1230 /*
1133 * If no_way_out gets set, there is no safe way to recover from this 1231 * If no_way_out gets set, there is no safe way to recover from this
1134 * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. 1232 * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
1135 */ 1233 */
1136 int no_way_out = 0; 1234 int no_way_out = 0;
1235
1137 /* 1236 /*
1138 * If kill_it gets set, there might be a way to recover from this 1237 * If kill_it gets set, there might be a way to recover from this
1139 * error. 1238 * error.
1140 */ 1239 */
1141 int kill_it = 0; 1240 int kill_it = 0;
1142 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1143 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1144 char *msg = "Unknown";
1145 1241
1146 /* 1242 /*
1147 * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES 1243 * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
1148 * on Intel. 1244 * on Intel.
1149 */ 1245 */
1150 int lmce = 1; 1246 int lmce = 1;
1151 int cpu = smp_processor_id();
1152 1247
1153 /* 1248 if (__mc_check_crashing_cpu(cpu))
1154 * Cases where we avoid rendezvous handler timeout: 1249 return;
1155 * 1) If this CPU is offline.
1156 *
1157 * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
1158 * skip those CPUs which remain looping in the 1st kernel - see
1159 * crash_nmi_callback().
1160 *
1161 * Note: there still is a small window between kexec-ing and the new,
1162 * kdump kernel establishing a new #MC handler where a broadcasted MCE
1163 * might not get handled properly.
1164 */
1165 if (cpu_is_offline(cpu) ||
1166 (crashing_cpu != -1 && crashing_cpu != cpu)) {
1167 u64 mcgstatus;
1168
1169 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
1170 if (mcgstatus & MCG_STATUS_RIPV) {
1171 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1172 return;
1173 }
1174 }
1175 1250
1176 ist_enter(regs); 1251 ist_enter(regs);
1177 1252
1178 this_cpu_inc(mce_exception_count); 1253 this_cpu_inc(mce_exception_count);
1179 1254
1180 if (!cfg->banks)
1181 goto out;
1182
1183 mce_gather_info(&m, regs); 1255 mce_gather_info(&m, regs);
1184 m.tsc = rdtsc(); 1256 m.tsc = rdtsc();
1185 1257
@@ -1220,67 +1292,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1220 order = mce_start(&no_way_out); 1292 order = mce_start(&no_way_out);
1221 } 1293 }
1222 1294
1223 for (i = 0; i < cfg->banks; i++) { 1295 __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
1224 __clear_bit(i, toclear);
1225 if (!test_bit(i, valid_banks))
1226 continue;
1227 if (!mce_banks[i].ctl)
1228 continue;
1229
1230 m.misc = 0;
1231 m.addr = 0;
1232 m.bank = i;
1233
1234 m.status = mce_rdmsrl(msr_ops.status(i));
1235 if ((m.status & MCI_STATUS_VAL) == 0)
1236 continue;
1237
1238 /*
1239 * Non uncorrected or non signaled errors are handled by
1240 * machine_check_poll. Leave them alone, unless this panics.
1241 */
1242 if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
1243 !no_way_out)
1244 continue;
1245
1246 /*
1247 * Set taint even when machine check was not enabled.
1248 */
1249 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
1250
1251 severity = mce_severity(&m, cfg->tolerant, NULL, true);
1252
1253 /*
1254 * When machine check was for corrected/deferred handler don't
1255 * touch, unless we're panicing.
1256 */
1257 if ((severity == MCE_KEEP_SEVERITY ||
1258 severity == MCE_UCNA_SEVERITY) && !no_way_out)
1259 continue;
1260 __set_bit(i, toclear);
1261 if (severity == MCE_NO_SEVERITY) {
1262 /*
1263 * Machine check event was not enabled. Clear, but
1264 * ignore.
1265 */
1266 continue;
1267 }
1268
1269 mce_read_aux(&m, i);
1270
1271 /* assuming valid severity level != 0 */
1272 m.severity = severity;
1273
1274 mce_log(&m);
1275
1276 if (severity > worst) {
1277 *final = m;
1278 worst = severity;
1279 }
1280 }
1281
1282 /* mce_clear_state will clear *final, save locally for use later */
1283 m = *final;
1284 1296
1285 if (!no_way_out) 1297 if (!no_way_out)
1286 mce_clear_state(toclear); 1298 mce_clear_state(toclear);
@@ -1319,7 +1331,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1319 if (worst > 0) 1331 if (worst > 0)
1320 mce_report_event(regs); 1332 mce_report_event(regs);
1321 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); 1333 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1322out: 1334
1323 sync_core(); 1335 sync_core();
1324 1336
1325 if (worst != MCE_AR_SEVERITY && !kill_it) 1337 if (worst != MCE_AR_SEVERITY && !kill_it)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 666a284116ac..9c8652974f8e 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,8 +22,6 @@
22#include <asm/stacktrace.h> 22#include <asm/stacktrace.h>
23#include <asm/unwind.h> 23#include <asm/unwind.h>
24 24
25#define OPCODE_BUFSIZE 64
26
27int panic_on_unrecovered_nmi; 25int panic_on_unrecovered_nmi;
28int panic_on_io_nmi; 26int panic_on_io_nmi;
29static int die_counter; 27static int die_counter;
@@ -93,26 +91,18 @@ static void printk_stack_address(unsigned long address, int reliable,
93 */ 91 */
94void show_opcodes(u8 *rip, const char *loglvl) 92void show_opcodes(u8 *rip, const char *loglvl)
95{ 93{
96 unsigned int code_prologue = OPCODE_BUFSIZE * 2 / 3; 94#define PROLOGUE_SIZE 42
95#define EPILOGUE_SIZE 21
96#define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
97 u8 opcodes[OPCODE_BUFSIZE]; 97 u8 opcodes[OPCODE_BUFSIZE];
98 u8 *ip;
99 int i;
100
101 printk("%sCode: ", loglvl);
102
103 ip = (u8 *)rip - code_prologue;
104 if (probe_kernel_read(opcodes, ip, OPCODE_BUFSIZE)) {
105 pr_cont("Bad RIP value.\n");
106 return;
107 }
108 98
109 for (i = 0; i < OPCODE_BUFSIZE; i++, ip++) { 99 if (probe_kernel_read(opcodes, rip - PROLOGUE_SIZE, OPCODE_BUFSIZE)) {
110 if (ip == rip) 100 printk("%sCode: Bad RIP value.\n", loglvl);
111 pr_cont("<%02x> ", opcodes[i]); 101 } else {
112 else 102 printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
113 pr_cont("%02x ", opcodes[i]); 103 __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,
104 opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1);
114 } 105 }
115 pr_cont("\n");
116} 106}
117 107
118void show_ip(struct pt_regs *regs, const char *loglvl) 108void show_ip(struct pt_regs *regs, const char *loglvl)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index abe6df15a8fb..30f9cb2c0b55 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -512,11 +512,18 @@ ENTRY(initial_code)
512ENTRY(setup_once_ref) 512ENTRY(setup_once_ref)
513 .long setup_once 513 .long setup_once
514 514
515#ifdef CONFIG_PAGE_TABLE_ISOLATION
516#define PGD_ALIGN (2 * PAGE_SIZE)
517#define PTI_USER_PGD_FILL 1024
518#else
519#define PGD_ALIGN (PAGE_SIZE)
520#define PTI_USER_PGD_FILL 0
521#endif
515/* 522/*
516 * BSS section 523 * BSS section
517 */ 524 */
518__PAGE_ALIGNED_BSS 525__PAGE_ALIGNED_BSS
519 .align PAGE_SIZE 526 .align PGD_ALIGN
520#ifdef CONFIG_X86_PAE 527#ifdef CONFIG_X86_PAE
521.globl initial_pg_pmd 528.globl initial_pg_pmd
522initial_pg_pmd: 529initial_pg_pmd:
@@ -526,14 +533,17 @@ initial_pg_pmd:
526initial_page_table: 533initial_page_table:
527 .fill 1024,4,0 534 .fill 1024,4,0
528#endif 535#endif
536 .align PGD_ALIGN
529initial_pg_fixmap: 537initial_pg_fixmap:
530 .fill 1024,4,0 538 .fill 1024,4,0
531.globl empty_zero_page
532empty_zero_page:
533 .fill 4096,1,0
534.globl swapper_pg_dir 539.globl swapper_pg_dir
540 .align PGD_ALIGN
535swapper_pg_dir: 541swapper_pg_dir:
536 .fill 1024,4,0 542 .fill 1024,4,0
543 .fill PTI_USER_PGD_FILL,4,0
544.globl empty_zero_page
545empty_zero_page:
546 .fill 4096,1,0
537EXPORT_SYMBOL(empty_zero_page) 547EXPORT_SYMBOL(empty_zero_page)
538 548
539/* 549/*
@@ -542,7 +552,7 @@ EXPORT_SYMBOL(empty_zero_page)
542#ifdef CONFIG_X86_PAE 552#ifdef CONFIG_X86_PAE
543__PAGE_ALIGNED_DATA 553__PAGE_ALIGNED_DATA
544 /* Page-aligned for the benefit of paravirt? */ 554 /* Page-aligned for the benefit of paravirt? */
545 .align PAGE_SIZE 555 .align PGD_ALIGN
546ENTRY(initial_page_table) 556ENTRY(initial_page_table)
547 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ 557 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
548# if KPMDS == 3 558# if KPMDS == 3
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 8344dd2f310a..15ebc2fc166e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -235,7 +235,7 @@ ENTRY(secondary_startup_64)
235 * address given in m16:64. 235 * address given in m16:64.
236 */ 236 */
237 pushq $.Lafter_lret # put return address on stack for unwinder 237 pushq $.Lafter_lret # put return address on stack for unwinder
238 xorq %rbp, %rbp # clear frame pointer 238 xorl %ebp, %ebp # clear frame pointer
239 movq initial_code(%rip), %rax 239 movq initial_code(%rip), %rax
240 pushq $__KERNEL_CS # set correct cs 240 pushq $__KERNEL_CS # set correct cs
241 pushq %rax # target address in negative space 241 pushq %rax # target address in negative space
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e56c95be2808..eeea935e9bb5 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -37,15 +37,18 @@ static void bug_at(unsigned char *ip, int line)
37 BUG(); 37 BUG();
38} 38}
39 39
40static void __jump_label_transform(struct jump_entry *entry, 40static void __ref __jump_label_transform(struct jump_entry *entry,
41 enum jump_label_type type, 41 enum jump_label_type type,
42 void *(*poker)(void *, const void *, size_t), 42 void *(*poker)(void *, const void *, size_t),
43 int init) 43 int init)
44{ 44{
45 union jump_code_union code; 45 union jump_code_union code;
46 const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; 46 const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
47 const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; 47 const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
48 48
49 if (early_boot_irqs_disabled)
50 poker = text_poke_early;
51
49 if (type == JUMP_LABEL_JMP) { 52 if (type == JUMP_LABEL_JMP) {
50 if (init) { 53 if (init) {
51 /* 54 /*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5b2300b818af..09aaabb2bbf1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -45,7 +45,6 @@
45#include <asm/apic.h> 45#include <asm/apic.h>
46#include <asm/apicdef.h> 46#include <asm/apicdef.h>
47#include <asm/hypervisor.h> 47#include <asm/hypervisor.h>
48#include <asm/kvm_guest.h>
49 48
50static int kvmapf = 1; 49static int kvmapf = 1;
51 50
@@ -66,15 +65,6 @@ static int __init parse_no_stealacc(char *arg)
66 65
67early_param("no-steal-acc", parse_no_stealacc); 66early_param("no-steal-acc", parse_no_stealacc);
68 67
69static int kvmclock_vsyscall = 1;
70static int __init parse_no_kvmclock_vsyscall(char *arg)
71{
72 kvmclock_vsyscall = 0;
73 return 0;
74}
75
76early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
77
78static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); 68static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
79static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64); 69static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
80static int has_steal_clock = 0; 70static int has_steal_clock = 0;
@@ -154,7 +144,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
154 144
155 for (;;) { 145 for (;;) {
156 if (!n.halted) 146 if (!n.halted)
157 prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); 147 prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
158 if (hlist_unhashed(&n.link)) 148 if (hlist_unhashed(&n.link))
159 break; 149 break;
160 150
@@ -188,7 +178,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
188 if (n->halted) 178 if (n->halted)
189 smp_send_reschedule(n->cpu); 179 smp_send_reschedule(n->cpu);
190 else if (swq_has_sleeper(&n->wq)) 180 else if (swq_has_sleeper(&n->wq))
191 swake_up(&n->wq); 181 swake_up_one(&n->wq);
192} 182}
193 183
194static void apf_task_wake_all(void) 184static void apf_task_wake_all(void)
@@ -560,9 +550,6 @@ static void __init kvm_guest_init(void)
560 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) 550 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
561 apic_set_eoi_write(kvm_guest_apic_eoi_write); 551 apic_set_eoi_write(kvm_guest_apic_eoi_write);
562 552
563 if (kvmclock_vsyscall)
564 kvm_setup_vsyscall_timeinfo();
565
566#ifdef CONFIG_SMP 553#ifdef CONFIG_SMP
567 smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; 554 smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
568 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 555 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
@@ -628,6 +615,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = {
628 .name = "KVM", 615 .name = "KVM",
629 .detect = kvm_detect, 616 .detect = kvm_detect,
630 .type = X86_HYPER_KVM, 617 .type = X86_HYPER_KVM,
618 .init.init_platform = kvmclock_init,
631 .init.guest_late_init = kvm_guest_init, 619 .init.guest_late_init = kvm_guest_init,
632 .init.x2apic_available = kvm_para_available, 620 .init.x2apic_available = kvm_para_available,
633}; 621};
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 3b8e7c13c614..d2edd7e6c294 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -23,30 +23,56 @@
23#include <asm/apic.h> 23#include <asm/apic.h>
24#include <linux/percpu.h> 24#include <linux/percpu.h>
25#include <linux/hardirq.h> 25#include <linux/hardirq.h>
26#include <linux/memblock.h> 26#include <linux/cpuhotplug.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/sched/clock.h> 28#include <linux/sched/clock.h>
29#include <linux/mm.h>
29 30
31#include <asm/hypervisor.h>
30#include <asm/mem_encrypt.h> 32#include <asm/mem_encrypt.h>
31#include <asm/x86_init.h> 33#include <asm/x86_init.h>
32#include <asm/reboot.h> 34#include <asm/reboot.h>
33#include <asm/kvmclock.h> 35#include <asm/kvmclock.h>
34 36
35static int kvmclock __ro_after_init = 1; 37static int kvmclock __initdata = 1;
36static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 38static int kvmclock_vsyscall __initdata = 1;
37static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 39static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME;
38static u64 kvm_sched_clock_offset; 40static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK;
41static u64 kvm_sched_clock_offset __ro_after_init;
39 42
40static int parse_no_kvmclock(char *arg) 43static int __init parse_no_kvmclock(char *arg)
41{ 44{
42 kvmclock = 0; 45 kvmclock = 0;
43 return 0; 46 return 0;
44} 47}
45early_param("no-kvmclock", parse_no_kvmclock); 48early_param("no-kvmclock", parse_no_kvmclock);
46 49
47/* The hypervisor will put information about time periodically here */ 50static int __init parse_no_kvmclock_vsyscall(char *arg)
48static struct pvclock_vsyscall_time_info *hv_clock; 51{
49static struct pvclock_wall_clock *wall_clock; 52 kvmclock_vsyscall = 0;
53 return 0;
54}
55early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
56
57/* Aligned to page sizes to match whats mapped via vsyscalls to userspace */
58#define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS)
59#define HVC_BOOT_ARRAY_SIZE \
60 (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
61
62static struct pvclock_vsyscall_time_info
63 hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
64static struct pvclock_wall_clock wall_clock;
65static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
66
67static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
68{
69 return &this_cpu_read(hv_clock_per_cpu)->pvti;
70}
71
72static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void)
73{
74 return this_cpu_read(hv_clock_per_cpu);
75}
50 76
51/* 77/*
52 * The wallclock is the time of day when we booted. Since then, some time may 78 * The wallclock is the time of day when we booted. Since then, some time may
@@ -55,21 +81,10 @@ static struct pvclock_wall_clock *wall_clock;
55 */ 81 */
56static void kvm_get_wallclock(struct timespec64 *now) 82static void kvm_get_wallclock(struct timespec64 *now)
57{ 83{
58 struct pvclock_vcpu_time_info *vcpu_time; 84 wrmsrl(msr_kvm_wall_clock, slow_virt_to_phys(&wall_clock));
59 int low, high; 85 preempt_disable();
60 int cpu; 86 pvclock_read_wallclock(&wall_clock, this_cpu_pvti(), now);
61 87 preempt_enable();
62 low = (int)slow_virt_to_phys(wall_clock);
63 high = ((u64)slow_virt_to_phys(wall_clock) >> 32);
64
65 native_write_msr(msr_kvm_wall_clock, low, high);
66
67 cpu = get_cpu();
68
69 vcpu_time = &hv_clock[cpu].pvti;
70 pvclock_read_wallclock(wall_clock, vcpu_time, now);
71
72 put_cpu();
73} 88}
74 89
75static int kvm_set_wallclock(const struct timespec64 *now) 90static int kvm_set_wallclock(const struct timespec64 *now)
@@ -79,14 +94,10 @@ static int kvm_set_wallclock(const struct timespec64 *now)
79 94
80static u64 kvm_clock_read(void) 95static u64 kvm_clock_read(void)
81{ 96{
82 struct pvclock_vcpu_time_info *src;
83 u64 ret; 97 u64 ret;
84 int cpu;
85 98
86 preempt_disable_notrace(); 99 preempt_disable_notrace();
87 cpu = smp_processor_id(); 100 ret = pvclock_clocksource_read(this_cpu_pvti());
88 src = &hv_clock[cpu].pvti;
89 ret = pvclock_clocksource_read(src);
90 preempt_enable_notrace(); 101 preempt_enable_notrace();
91 return ret; 102 return ret;
92} 103}
@@ -112,11 +123,11 @@ static inline void kvm_sched_clock_init(bool stable)
112 kvm_sched_clock_offset = kvm_clock_read(); 123 kvm_sched_clock_offset = kvm_clock_read();
113 pv_time_ops.sched_clock = kvm_sched_clock_read; 124 pv_time_ops.sched_clock = kvm_sched_clock_read;
114 125
115 printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", 126 pr_info("kvm-clock: using sched offset of %llu cycles",
116 kvm_sched_clock_offset); 127 kvm_sched_clock_offset);
117 128
118 BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) > 129 BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
119 sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); 130 sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
120} 131}
121 132
122/* 133/*
@@ -130,19 +141,11 @@ static inline void kvm_sched_clock_init(bool stable)
130 */ 141 */
131static unsigned long kvm_get_tsc_khz(void) 142static unsigned long kvm_get_tsc_khz(void)
132{ 143{
133 struct pvclock_vcpu_time_info *src;
134 int cpu;
135 unsigned long tsc_khz;
136
137 cpu = get_cpu();
138 src = &hv_clock[cpu].pvti;
139 tsc_khz = pvclock_tsc_khz(src);
140 put_cpu();
141 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 144 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
142 return tsc_khz; 145 return pvclock_tsc_khz(this_cpu_pvti());
143} 146}
144 147
145static void kvm_get_preset_lpj(void) 148static void __init kvm_get_preset_lpj(void)
146{ 149{
147 unsigned long khz; 150 unsigned long khz;
148 u64 lpj; 151 u64 lpj;
@@ -156,49 +159,40 @@ static void kvm_get_preset_lpj(void)
156 159
157bool kvm_check_and_clear_guest_paused(void) 160bool kvm_check_and_clear_guest_paused(void)
158{ 161{
162 struct pvclock_vsyscall_time_info *src = this_cpu_hvclock();
159 bool ret = false; 163 bool ret = false;
160 struct pvclock_vcpu_time_info *src;
161 int cpu = smp_processor_id();
162 164
163 if (!hv_clock) 165 if (!src)
164 return ret; 166 return ret;
165 167
166 src = &hv_clock[cpu].pvti; 168 if ((src->pvti.flags & PVCLOCK_GUEST_STOPPED) != 0) {
167 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 169 src->pvti.flags &= ~PVCLOCK_GUEST_STOPPED;
168 src->flags &= ~PVCLOCK_GUEST_STOPPED;
169 pvclock_touch_watchdogs(); 170 pvclock_touch_watchdogs();
170 ret = true; 171 ret = true;
171 } 172 }
172
173 return ret; 173 return ret;
174} 174}
175 175
176struct clocksource kvm_clock = { 176struct clocksource kvm_clock = {
177 .name = "kvm-clock", 177 .name = "kvm-clock",
178 .read = kvm_clock_get_cycles, 178 .read = kvm_clock_get_cycles,
179 .rating = 400, 179 .rating = 400,
180 .mask = CLOCKSOURCE_MASK(64), 180 .mask = CLOCKSOURCE_MASK(64),
181 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 181 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
182}; 182};
183EXPORT_SYMBOL_GPL(kvm_clock); 183EXPORT_SYMBOL_GPL(kvm_clock);
184 184
185int kvm_register_clock(char *txt) 185static void kvm_register_clock(char *txt)
186{ 186{
187 int cpu = smp_processor_id(); 187 struct pvclock_vsyscall_time_info *src = this_cpu_hvclock();
188 int low, high, ret; 188 u64 pa;
189 struct pvclock_vcpu_time_info *src;
190
191 if (!hv_clock)
192 return 0;
193 189
194 src = &hv_clock[cpu].pvti; 190 if (!src)
195 low = (int)slow_virt_to_phys(src) | 1; 191 return;
196 high = ((u64)slow_virt_to_phys(src) >> 32);
197 ret = native_write_msr_safe(msr_kvm_system_time, low, high);
198 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
199 cpu, high, low, txt);
200 192
201 return ret; 193 pa = slow_virt_to_phys(&src->pvti) | 0x01ULL;
194 wrmsrl(msr_kvm_system_time, pa);
195 pr_info("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
202} 196}
203 197
204static void kvm_save_sched_clock_state(void) 198static void kvm_save_sched_clock_state(void)
@@ -213,11 +207,7 @@ static void kvm_restore_sched_clock_state(void)
213#ifdef CONFIG_X86_LOCAL_APIC 207#ifdef CONFIG_X86_LOCAL_APIC
214static void kvm_setup_secondary_clock(void) 208static void kvm_setup_secondary_clock(void)
215{ 209{
216 /* 210 kvm_register_clock("secondary cpu clock");
217 * Now that the first cpu already had this clocksource initialized,
218 * we shouldn't fail.
219 */
220 WARN_ON(kvm_register_clock("secondary cpu clock"));
221} 211}
222#endif 212#endif
223 213
@@ -245,100 +235,84 @@ static void kvm_shutdown(void)
245 native_machine_shutdown(); 235 native_machine_shutdown();
246} 236}
247 237
248static phys_addr_t __init kvm_memblock_alloc(phys_addr_t size, 238static int __init kvm_setup_vsyscall_timeinfo(void)
249 phys_addr_t align)
250{ 239{
251 phys_addr_t mem; 240#ifdef CONFIG_X86_64
241 u8 flags;
252 242
253 mem = memblock_alloc(size, align); 243 if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall)
254 if (!mem)
255 return 0; 244 return 0;
256 245
257 if (sev_active()) { 246 flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
258 if (early_set_memory_decrypted((unsigned long)__va(mem), size)) 247 if (!(flags & PVCLOCK_TSC_STABLE_BIT))
259 goto e_free; 248 return 0;
260 }
261 249
262 return mem; 250 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
263e_free: 251#endif
264 memblock_free(mem, size);
265 return 0; 252 return 0;
266} 253}
254early_initcall(kvm_setup_vsyscall_timeinfo);
267 255
268static void __init kvm_memblock_free(phys_addr_t addr, phys_addr_t size) 256static int kvmclock_setup_percpu(unsigned int cpu)
269{ 257{
270 if (sev_active()) 258 struct pvclock_vsyscall_time_info *p = per_cpu(hv_clock_per_cpu, cpu);
271 early_set_memory_encrypted((unsigned long)__va(addr), size);
272 259
273 memblock_free(addr, size); 260 /*
261 * The per cpu area setup replicates CPU0 data to all cpu
262 * pointers. So carefully check. CPU0 has been set up in init
263 * already.
264 */
265 if (!cpu || (p && p != per_cpu(hv_clock_per_cpu, 0)))
266 return 0;
267
268 /* Use the static page for the first CPUs, allocate otherwise */
269 if (cpu < HVC_BOOT_ARRAY_SIZE)
270 p = &hv_clock_boot[cpu];
271 else
272 p = kzalloc(sizeof(*p), GFP_KERNEL);
273
274 per_cpu(hv_clock_per_cpu, cpu) = p;
275 return p ? 0 : -ENOMEM;
274} 276}
275 277
276void __init kvmclock_init(void) 278void __init kvmclock_init(void)
277{ 279{
278 struct pvclock_vcpu_time_info *vcpu_time;
279 unsigned long mem, mem_wall_clock;
280 int size, cpu, wall_clock_size;
281 u8 flags; 280 u8 flags;
282 281
283 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); 282 if (!kvm_para_available() || !kvmclock)
284
285 if (!kvm_para_available())
286 return; 283 return;
287 284
288 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { 285 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
289 msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; 286 msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
290 msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; 287 msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
291 } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) 288 } else if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
292 return;
293
294 wall_clock_size = PAGE_ALIGN(sizeof(struct pvclock_wall_clock));
295 mem_wall_clock = kvm_memblock_alloc(wall_clock_size, PAGE_SIZE);
296 if (!mem_wall_clock)
297 return;
298
299 wall_clock = __va(mem_wall_clock);
300 memset(wall_clock, 0, wall_clock_size);
301
302 mem = kvm_memblock_alloc(size, PAGE_SIZE);
303 if (!mem) {
304 kvm_memblock_free(mem_wall_clock, wall_clock_size);
305 wall_clock = NULL;
306 return; 289 return;
307 } 290 }
308 291
309 hv_clock = __va(mem); 292 if (cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "kvmclock:setup_percpu",
310 memset(hv_clock, 0, size); 293 kvmclock_setup_percpu, NULL) < 0) {
311
312 if (kvm_register_clock("primary cpu clock")) {
313 hv_clock = NULL;
314 kvm_memblock_free(mem, size);
315 kvm_memblock_free(mem_wall_clock, wall_clock_size);
316 wall_clock = NULL;
317 return; 294 return;
318 } 295 }
319 296
320 printk(KERN_INFO "kvm-clock: Using msrs %x and %x", 297 pr_info("kvm-clock: Using msrs %x and %x",
321 msr_kvm_system_time, msr_kvm_wall_clock); 298 msr_kvm_system_time, msr_kvm_wall_clock);
322 299
323 pvclock_set_pvti_cpu0_va(hv_clock); 300 this_cpu_write(hv_clock_per_cpu, &hv_clock_boot[0]);
301 kvm_register_clock("primary cpu clock");
302 pvclock_set_pvti_cpu0_va(hv_clock_boot);
324 303
325 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) 304 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
326 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); 305 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
327 306
328 cpu = get_cpu(); 307 flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
329 vcpu_time = &hv_clock[cpu].pvti;
330 flags = pvclock_read_flags(vcpu_time);
331
332 kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); 308 kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
333 put_cpu();
334 309
335 x86_platform.calibrate_tsc = kvm_get_tsc_khz; 310 x86_platform.calibrate_tsc = kvm_get_tsc_khz;
336 x86_platform.calibrate_cpu = kvm_get_tsc_khz; 311 x86_platform.calibrate_cpu = kvm_get_tsc_khz;
337 x86_platform.get_wallclock = kvm_get_wallclock; 312 x86_platform.get_wallclock = kvm_get_wallclock;
338 x86_platform.set_wallclock = kvm_set_wallclock; 313 x86_platform.set_wallclock = kvm_set_wallclock;
339#ifdef CONFIG_X86_LOCAL_APIC 314#ifdef CONFIG_X86_LOCAL_APIC
340 x86_cpuinit.early_percpu_clock_init = 315 x86_cpuinit.early_percpu_clock_init = kvm_setup_secondary_clock;
341 kvm_setup_secondary_clock;
342#endif 316#endif
343 x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; 317 x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
344 x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; 318 x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
@@ -350,31 +324,3 @@ void __init kvmclock_init(void)
350 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); 324 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
351 pv_info.name = "KVM"; 325 pv_info.name = "KVM";
352} 326}
353
354int __init kvm_setup_vsyscall_timeinfo(void)
355{
356#ifdef CONFIG_X86_64
357 int cpu;
358 u8 flags;
359 struct pvclock_vcpu_time_info *vcpu_time;
360 unsigned int size;
361
362 if (!hv_clock)
363 return 0;
364
365 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
366
367 cpu = get_cpu();
368
369 vcpu_time = &hv_clock[cpu].pvti;
370 flags = pvclock_read_flags(vcpu_time);
371
372 put_cpu();
373
374 if (!(flags & PVCLOCK_TSC_STABLE_BIT))
375 return 1;
376
377 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
378#endif
379 return 0;
380}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index c9b14020f4dd..733e6ace0fa4 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -100,6 +100,102 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
100 return new_ldt; 100 return new_ldt;
101} 101}
102 102
103#ifdef CONFIG_PAGE_TABLE_ISOLATION
104
105static void do_sanity_check(struct mm_struct *mm,
106 bool had_kernel_mapping,
107 bool had_user_mapping)
108{
109 if (mm->context.ldt) {
110 /*
111 * We already had an LDT. The top-level entry should already
112 * have been allocated and synchronized with the usermode
113 * tables.
114 */
115 WARN_ON(!had_kernel_mapping);
116 if (static_cpu_has(X86_FEATURE_PTI))
117 WARN_ON(!had_user_mapping);
118 } else {
119 /*
120 * This is the first time we're mapping an LDT for this process.
121 * Sync the pgd to the usermode tables.
122 */
123 WARN_ON(had_kernel_mapping);
124 if (static_cpu_has(X86_FEATURE_PTI))
125 WARN_ON(had_user_mapping);
126 }
127}
128
129#ifdef CONFIG_X86_PAE
130
131static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va)
132{
133 p4d_t *p4d;
134 pud_t *pud;
135
136 if (pgd->pgd == 0)
137 return NULL;
138
139 p4d = p4d_offset(pgd, va);
140 if (p4d_none(*p4d))
141 return NULL;
142
143 pud = pud_offset(p4d, va);
144 if (pud_none(*pud))
145 return NULL;
146
147 return pmd_offset(pud, va);
148}
149
150static void map_ldt_struct_to_user(struct mm_struct *mm)
151{
152 pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
153 pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
154 pmd_t *k_pmd, *u_pmd;
155
156 k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
157 u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
158
159 if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
160 set_pmd(u_pmd, *k_pmd);
161}
162
163static void sanity_check_ldt_mapping(struct mm_struct *mm)
164{
165 pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
166 pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
167 bool had_kernel, had_user;
168 pmd_t *k_pmd, *u_pmd;
169
170 k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
171 u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
172 had_kernel = (k_pmd->pmd != 0);
173 had_user = (u_pmd->pmd != 0);
174
175 do_sanity_check(mm, had_kernel, had_user);
176}
177
178#else /* !CONFIG_X86_PAE */
179
180static void map_ldt_struct_to_user(struct mm_struct *mm)
181{
182 pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
183
184 if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
185 set_pgd(kernel_to_user_pgdp(pgd), *pgd);
186}
187
188static void sanity_check_ldt_mapping(struct mm_struct *mm)
189{
190 pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
191 bool had_kernel = (pgd->pgd != 0);
192 bool had_user = (kernel_to_user_pgdp(pgd)->pgd != 0);
193
194 do_sanity_check(mm, had_kernel, had_user);
195}
196
197#endif /* CONFIG_X86_PAE */
198
103/* 199/*
104 * If PTI is enabled, this maps the LDT into the kernelmode and 200 * If PTI is enabled, this maps the LDT into the kernelmode and
105 * usermode tables for the given mm. 201 * usermode tables for the given mm.
@@ -115,9 +211,8 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
115static int 211static int
116map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) 212map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
117{ 213{
118#ifdef CONFIG_PAGE_TABLE_ISOLATION
119 bool is_vmalloc, had_top_level_entry;
120 unsigned long va; 214 unsigned long va;
215 bool is_vmalloc;
121 spinlock_t *ptl; 216 spinlock_t *ptl;
122 pgd_t *pgd; 217 pgd_t *pgd;
123 int i; 218 int i;
@@ -131,13 +226,15 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
131 */ 226 */
132 WARN_ON(ldt->slot != -1); 227 WARN_ON(ldt->slot != -1);
133 228
229 /* Check if the current mappings are sane */
230 sanity_check_ldt_mapping(mm);
231
134 /* 232 /*
135 * Did we already have the top level entry allocated? We can't 233 * Did we already have the top level entry allocated? We can't
136 * use pgd_none() for this because it doens't do anything on 234 * use pgd_none() for this because it doens't do anything on
137 * 4-level page table kernels. 235 * 4-level page table kernels.
138 */ 236 */
139 pgd = pgd_offset(mm, LDT_BASE_ADDR); 237 pgd = pgd_offset(mm, LDT_BASE_ADDR);
140 had_top_level_entry = (pgd->pgd != 0);
141 238
142 is_vmalloc = is_vmalloc_addr(ldt->entries); 239 is_vmalloc = is_vmalloc_addr(ldt->entries);
143 240
@@ -172,41 +269,31 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
172 pte_unmap_unlock(ptep, ptl); 269 pte_unmap_unlock(ptep, ptl);
173 } 270 }
174 271
175 if (mm->context.ldt) { 272 /* Propagate LDT mapping to the user page-table */
176 /* 273 map_ldt_struct_to_user(mm);
177 * We already had an LDT. The top-level entry should already
178 * have been allocated and synchronized with the usermode
179 * tables.
180 */
181 WARN_ON(!had_top_level_entry);
182 if (static_cpu_has(X86_FEATURE_PTI))
183 WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
184 } else {
185 /*
186 * This is the first time we're mapping an LDT for this process.
187 * Sync the pgd to the usermode tables.
188 */
189 WARN_ON(had_top_level_entry);
190 if (static_cpu_has(X86_FEATURE_PTI)) {
191 WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
192 set_pgd(kernel_to_user_pgdp(pgd), *pgd);
193 }
194 }
195 274
196 va = (unsigned long)ldt_slot_va(slot); 275 va = (unsigned long)ldt_slot_va(slot);
197 flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); 276 flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
198 277
199 ldt->slot = slot; 278 ldt->slot = slot;
200#endif
201 return 0; 279 return 0;
202} 280}
203 281
282#else /* !CONFIG_PAGE_TABLE_ISOLATION */
283
284static int
285map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
286{
287 return 0;
288}
289#endif /* CONFIG_PAGE_TABLE_ISOLATION */
290
204static void free_ldt_pgtables(struct mm_struct *mm) 291static void free_ldt_pgtables(struct mm_struct *mm)
205{ 292{
206#ifdef CONFIG_PAGE_TABLE_ISOLATION 293#ifdef CONFIG_PAGE_TABLE_ISOLATION
207 struct mmu_gather tlb; 294 struct mmu_gather tlb;
208 unsigned long start = LDT_BASE_ADDR; 295 unsigned long start = LDT_BASE_ADDR;
209 unsigned long end = start + (1UL << PGDIR_SHIFT); 296 unsigned long end = LDT_END_ADDR;
210 297
211 if (!static_cpu_has(X86_FEATURE_PTI)) 298 if (!static_cpu_has(X86_FEATURE_PTI))
212 return; 299 return;
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index d1ab07ec8c9a..5409c2800ab5 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -56,7 +56,7 @@ static void load_segments(void)
56 56
57static void machine_kexec_free_page_tables(struct kimage *image) 57static void machine_kexec_free_page_tables(struct kimage *image)
58{ 58{
59 free_page((unsigned long)image->arch.pgd); 59 free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER);
60 image->arch.pgd = NULL; 60 image->arch.pgd = NULL;
61#ifdef CONFIG_X86_PAE 61#ifdef CONFIG_X86_PAE
62 free_page((unsigned long)image->arch.pmd0); 62 free_page((unsigned long)image->arch.pmd0);
@@ -72,7 +72,8 @@ static void machine_kexec_free_page_tables(struct kimage *image)
72 72
73static int machine_kexec_alloc_page_tables(struct kimage *image) 73static int machine_kexec_alloc_page_tables(struct kimage *image)
74{ 74{
75 image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); 75 image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
76 PGD_ALLOCATION_ORDER);
76#ifdef CONFIG_X86_PAE 77#ifdef CONFIG_X86_PAE
77 image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); 78 image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
78 image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); 79 image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 99dc79e76bdc..930c88341e4e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -88,10 +88,12 @@ unsigned paravirt_patch_call(void *insnbuf,
88 struct branch *b = insnbuf; 88 struct branch *b = insnbuf;
89 unsigned long delta = (unsigned long)target - (addr+5); 89 unsigned long delta = (unsigned long)target - (addr+5);
90 90
91 if (tgt_clobbers & ~site_clobbers) 91 if (len < 5) {
92 return len; /* target would clobber too much for this site */ 92#ifdef CONFIG_RETPOLINE
93 if (len < 5) 93 WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
94#endif
94 return len; /* call too long for patch site */ 95 return len; /* call too long for patch site */
96 }
95 97
96 b->opcode = 0xe8; /* call */ 98 b->opcode = 0xe8; /* call */
97 b->delta = delta; 99 b->delta = delta;
@@ -106,8 +108,12 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
106 struct branch *b = insnbuf; 108 struct branch *b = insnbuf;
107 unsigned long delta = (unsigned long)target - (addr+5); 109 unsigned long delta = (unsigned long)target - (addr+5);
108 110
109 if (len < 5) 111 if (len < 5) {
112#ifdef CONFIG_RETPOLINE
113 WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
114#endif
110 return len; /* call too long for patch site */ 115 return len; /* call too long for patch site */
116 }
111 117
112 b->opcode = 0xe9; /* jmp */ 118 b->opcode = 0xe9; /* jmp */
113 b->delta = delta; 119 b->delta = delta;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 9edadabf04f6..9cb98f7b07c9 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax");
20 20
21#if defined(CONFIG_PARAVIRT_SPINLOCKS) 21#if defined(CONFIG_PARAVIRT_SPINLOCKS)
22DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); 22DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
23DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); 23DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
24#endif 24#endif
25 25
26unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) 26unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 4dfd90a75e63..2e9006c1e240 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -60,7 +60,7 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
60 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n", 60 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
61 p->detect, q->detect); 61 p->detect, q->detect);
62 /* Heavy handed way..*/ 62 /* Heavy handed way..*/
63 x->depend = 0; 63 x->depend = NULL;
64 } 64 }
65 } 65 }
66 66
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index da5190a1ea16..4a710ffffd9a 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c
@@ -9,6 +9,6 @@ static __init int add_pcspkr(void)
9 9
10 pd = platform_device_register_simple("pcspkr", -1, NULL, 0); 10 pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
11 11
12 return IS_ERR(pd) ? PTR_ERR(pd) : 0; 12 return PTR_ERR_OR_ZERO(pd);
13} 13}
14device_initcall(add_pcspkr); 14device_initcall(add_pcspkr);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 30ca2d1a9231..c93fcfdf1673 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,14 +57,12 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
57 */ 57 */
58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, 58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
59 59
60#ifdef CONFIG_X86_64
61 /* 60 /*
62 * .sp1 is cpu_current_top_of_stack. The init task never 61 * .sp1 is cpu_current_top_of_stack. The init task never
63 * runs user code, but cpu_current_top_of_stack should still 62 * runs user code, but cpu_current_top_of_stack should still
64 * be well defined before the first context switch. 63 * be well defined before the first context switch.
65 */ 64 */
66 .sp1 = TOP_OF_INIT_STACK, 65 .sp1 = TOP_OF_INIT_STACK,
67#endif
68 66
69#ifdef CONFIG_X86_32 67#ifdef CONFIG_X86_32
70 .ss0 = __KERNEL_DS, 68 .ss0 = __KERNEL_DS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0ae659de21eb..2924fd447e61 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -285,7 +285,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
285 * current_thread_info(). Refresh the SYSENTER configuration in 285 * current_thread_info(). Refresh the SYSENTER configuration in
286 * case prev or next is vm86. 286 * case prev or next is vm86.
287 */ 287 */
288 update_sp0(next_p); 288 update_task_stack(next_p);
289 refresh_sysenter_cs(next); 289 refresh_sysenter_cs(next);
290 this_cpu_write(cpu_current_top_of_stack, 290 this_cpu_write(cpu_current_top_of_stack,
291 (unsigned long)task_stack_page(next_p) + 291 (unsigned long)task_stack_page(next_p) +
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 12bb445fb98d..476e3ddf8890 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -478,7 +478,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
478 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); 478 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
479 479
480 /* Reload sp0. */ 480 /* Reload sp0. */
481 update_sp0(next_p); 481 update_task_stack(next_p);
482 482
483 /* 483 /*
484 * Now maybe reload the debug registers and handle I/O bitmaps 484 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2f86d883dd95..5d32c55aeb8b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -866,6 +866,8 @@ void __init setup_arch(char **cmdline_p)
866 866
867 idt_setup_early_traps(); 867 idt_setup_early_traps();
868 early_cpu_init(); 868 early_cpu_init();
869 arch_init_ideal_nops();
870 jump_label_init();
869 early_ioremap_init(); 871 early_ioremap_init();
870 872
871 setup_olpc_ofw_pgd(); 873 setup_olpc_ofw_pgd();
@@ -1012,6 +1014,7 @@ void __init setup_arch(char **cmdline_p)
1012 */ 1014 */
1013 init_hypervisor_platform(); 1015 init_hypervisor_platform();
1014 1016
1017 tsc_early_init();
1015 x86_init.resources.probe_roms(); 1018 x86_init.resources.probe_roms();
1016 1019
1017 /* after parse_early_param, so could debug it */ 1020 /* after parse_early_param, so could debug it */
@@ -1197,11 +1200,6 @@ void __init setup_arch(char **cmdline_p)
1197 1200
1198 memblock_find_dma_reserve(); 1201 memblock_find_dma_reserve();
1199 1202
1200#ifdef CONFIG_KVM_GUEST
1201 kvmclock_init();
1202#endif
1203
1204 tsc_early_delay_calibrate();
1205 if (!early_xdbc_setup_hardware()) 1203 if (!early_xdbc_setup_hardware())
1206 early_xdbc_register_console(); 1204 early_xdbc_register_console();
1207 1205
@@ -1272,8 +1270,6 @@ void __init setup_arch(char **cmdline_p)
1272 1270
1273 mcheck_init(); 1271 mcheck_init();
1274 1272
1275 arch_init_ideal_nops();
1276
1277 register_refined_jiffies(CLOCK_TICK_RATE); 1273 register_refined_jiffies(CLOCK_TICK_RATE);
1278 1274
1279#ifdef CONFIG_EFI 1275#ifdef CONFIG_EFI
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 093f2ea5dd56..7627455047c2 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -81,16 +81,6 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
81 81
82#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE 82#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
83 83
84#define STACKTRACE_DUMP_ONCE(task) ({ \
85 static bool __section(.data.unlikely) __dumped; \
86 \
87 if (!__dumped) { \
88 __dumped = true; \
89 WARN_ON(1); \
90 show_stack(task, NULL); \
91 } \
92})
93
94static int __always_inline 84static int __always_inline
95__save_stack_trace_reliable(struct stack_trace *trace, 85__save_stack_trace_reliable(struct stack_trace *trace,
96 struct task_struct *task) 86 struct task_struct *task)
@@ -99,30 +89,25 @@ __save_stack_trace_reliable(struct stack_trace *trace,
99 struct pt_regs *regs; 89 struct pt_regs *regs;
100 unsigned long addr; 90 unsigned long addr;
101 91
102 for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); 92 for (unwind_start(&state, task, NULL, NULL);
93 !unwind_done(&state) && !unwind_error(&state);
103 unwind_next_frame(&state)) { 94 unwind_next_frame(&state)) {
104 95
105 regs = unwind_get_entry_regs(&state, NULL); 96 regs = unwind_get_entry_regs(&state, NULL);
106 if (regs) { 97 if (regs) {
98 /* Success path for user tasks */
99 if (user_mode(regs))
100 goto success;
101
107 /* 102 /*
108 * Kernel mode registers on the stack indicate an 103 * Kernel mode registers on the stack indicate an
109 * in-kernel interrupt or exception (e.g., preemption 104 * in-kernel interrupt or exception (e.g., preemption
110 * or a page fault), which can make frame pointers 105 * or a page fault), which can make frame pointers
111 * unreliable. 106 * unreliable.
112 */ 107 */
113 if (!user_mode(regs))
114 return -EINVAL;
115 108
116 /* 109 if (IS_ENABLED(CONFIG_FRAME_POINTER))
117 * The last frame contains the user mode syscall
118 * pt_regs. Skip it and finish the unwind.
119 */
120 unwind_next_frame(&state);
121 if (!unwind_done(&state)) {
122 STACKTRACE_DUMP_ONCE(task);
123 return -EINVAL; 110 return -EINVAL;
124 }
125 break;
126 } 111 }
127 112
128 addr = unwind_get_return_address(&state); 113 addr = unwind_get_return_address(&state);
@@ -132,21 +117,22 @@ __save_stack_trace_reliable(struct stack_trace *trace,
132 * generated code which __kernel_text_address() doesn't know 117 * generated code which __kernel_text_address() doesn't know
133 * about. 118 * about.
134 */ 119 */
135 if (!addr) { 120 if (!addr)
136 STACKTRACE_DUMP_ONCE(task);
137 return -EINVAL; 121 return -EINVAL;
138 }
139 122
140 if (save_stack_address(trace, addr, false)) 123 if (save_stack_address(trace, addr, false))
141 return -EINVAL; 124 return -EINVAL;
142 } 125 }
143 126
144 /* Check for stack corruption */ 127 /* Check for stack corruption */
145 if (unwind_error(&state)) { 128 if (unwind_error(&state))
146 STACKTRACE_DUMP_ONCE(task); 129 return -EINVAL;
130
131 /* Success path for non-user tasks, i.e. kthreads and idle tasks */
132 if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
147 return -EINVAL; 133 return -EINVAL;
148 }
149 134
135success:
150 if (trace->nr_entries < trace->max_entries) 136 if (trace->nr_entries < trace->max_entries)
151 trace->entries[trace->nr_entries++] = ULONG_MAX; 137 trace->entries[trace->nr_entries++] = ULONG_MAX;
152 138
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 74392d9d51e0..1463468ba9a0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -33,16 +33,13 @@ EXPORT_SYMBOL(cpu_khz);
33unsigned int __read_mostly tsc_khz; 33unsigned int __read_mostly tsc_khz;
34EXPORT_SYMBOL(tsc_khz); 34EXPORT_SYMBOL(tsc_khz);
35 35
36#define KHZ 1000
37
36/* 38/*
37 * TSC can be unstable due to cpufreq or due to unsynced TSCs 39 * TSC can be unstable due to cpufreq or due to unsynced TSCs
38 */ 40 */
39static int __read_mostly tsc_unstable; 41static int __read_mostly tsc_unstable;
40 42
41/* native_sched_clock() is called before tsc_init(), so
42 we must start with the TSC soft disabled to prevent
43 erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
44static int __read_mostly tsc_disabled = -1;
45
46static DEFINE_STATIC_KEY_FALSE(__use_tsc); 43static DEFINE_STATIC_KEY_FALSE(__use_tsc);
47 44
48int tsc_clocksource_reliable; 45int tsc_clocksource_reliable;
@@ -106,23 +103,6 @@ void cyc2ns_read_end(void)
106 * -johnstul@us.ibm.com "math is hard, lets go shopping!" 103 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
107 */ 104 */
108 105
109static void cyc2ns_data_init(struct cyc2ns_data *data)
110{
111 data->cyc2ns_mul = 0;
112 data->cyc2ns_shift = 0;
113 data->cyc2ns_offset = 0;
114}
115
116static void __init cyc2ns_init(int cpu)
117{
118 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
119
120 cyc2ns_data_init(&c2n->data[0]);
121 cyc2ns_data_init(&c2n->data[1]);
122
123 seqcount_init(&c2n->seq);
124}
125
126static inline unsigned long long cycles_2_ns(unsigned long long cyc) 106static inline unsigned long long cycles_2_ns(unsigned long long cyc)
127{ 107{
128 struct cyc2ns_data data; 108 struct cyc2ns_data data;
@@ -138,18 +118,11 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
138 return ns; 118 return ns;
139} 119}
140 120
141static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) 121static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
142{ 122{
143 unsigned long long ns_now; 123 unsigned long long ns_now;
144 struct cyc2ns_data data; 124 struct cyc2ns_data data;
145 struct cyc2ns *c2n; 125 struct cyc2ns *c2n;
146 unsigned long flags;
147
148 local_irq_save(flags);
149 sched_clock_idle_sleep_event();
150
151 if (!khz)
152 goto done;
153 126
154 ns_now = cycles_2_ns(tsc_now); 127 ns_now = cycles_2_ns(tsc_now);
155 128
@@ -181,13 +154,56 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_
181 c2n->data[0] = data; 154 c2n->data[0] = data;
182 raw_write_seqcount_latch(&c2n->seq); 155 raw_write_seqcount_latch(&c2n->seq);
183 c2n->data[1] = data; 156 c2n->data[1] = data;
157}
158
159static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
160{
161 unsigned long flags;
162
163 local_irq_save(flags);
164 sched_clock_idle_sleep_event();
165
166 if (khz)
167 __set_cyc2ns_scale(khz, cpu, tsc_now);
184 168
185done:
186 sched_clock_idle_wakeup_event(); 169 sched_clock_idle_wakeup_event();
187 local_irq_restore(flags); 170 local_irq_restore(flags);
188} 171}
189 172
190/* 173/*
174 * Initialize cyc2ns for boot cpu
175 */
176static void __init cyc2ns_init_boot_cpu(void)
177{
178 struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
179
180 seqcount_init(&c2n->seq);
181 __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
182}
183
184/*
185 * Secondary CPUs do not run through tsc_init(), so set up
186 * all the scale factors for all CPUs, assuming the same
187 * speed as the bootup CPU. (cpufreq notifiers will fix this
188 * up if their speed diverges)
189 */
190static void __init cyc2ns_init_secondary_cpus(void)
191{
192 unsigned int cpu, this_cpu = smp_processor_id();
193 struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
194 struct cyc2ns_data *data = c2n->data;
195
196 for_each_possible_cpu(cpu) {
197 if (cpu != this_cpu) {
198 seqcount_init(&c2n->seq);
199 c2n = per_cpu_ptr(&cyc2ns, cpu);
200 c2n->data[0] = data[0];
201 c2n->data[1] = data[1];
202 }
203 }
204}
205
206/*
191 * Scheduler clock - returns current time in nanosec units. 207 * Scheduler clock - returns current time in nanosec units.
192 */ 208 */
193u64 native_sched_clock(void) 209u64 native_sched_clock(void)
@@ -248,8 +264,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
248#ifdef CONFIG_X86_TSC 264#ifdef CONFIG_X86_TSC
249int __init notsc_setup(char *str) 265int __init notsc_setup(char *str)
250{ 266{
251 pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); 267 mark_tsc_unstable("boot parameter notsc");
252 tsc_disabled = 1;
253 return 1; 268 return 1;
254} 269}
255#else 270#else
@@ -665,30 +680,17 @@ static unsigned long cpu_khz_from_cpuid(void)
665 return eax_base_mhz * 1000; 680 return eax_base_mhz * 1000;
666} 681}
667 682
668/** 683/*
669 * native_calibrate_cpu - calibrate the cpu on boot 684 * calibrate cpu using pit, hpet, and ptimer methods. They are available
685 * later in boot after acpi is initialized.
670 */ 686 */
671unsigned long native_calibrate_cpu(void) 687static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
672{ 688{
673 u64 tsc1, tsc2, delta, ref1, ref2; 689 u64 tsc1, tsc2, delta, ref1, ref2;
674 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 690 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
675 unsigned long flags, latch, ms, fast_calibrate; 691 unsigned long flags, latch, ms;
676 int hpet = is_hpet_enabled(), i, loopmin; 692 int hpet = is_hpet_enabled(), i, loopmin;
677 693
678 fast_calibrate = cpu_khz_from_cpuid();
679 if (fast_calibrate)
680 return fast_calibrate;
681
682 fast_calibrate = cpu_khz_from_msr();
683 if (fast_calibrate)
684 return fast_calibrate;
685
686 local_irq_save(flags);
687 fast_calibrate = quick_pit_calibrate();
688 local_irq_restore(flags);
689 if (fast_calibrate)
690 return fast_calibrate;
691
692 /* 694 /*
693 * Run 5 calibration loops to get the lowest frequency value 695 * Run 5 calibration loops to get the lowest frequency value
694 * (the best estimate). We use two different calibration modes 696 * (the best estimate). We use two different calibration modes
@@ -831,6 +833,37 @@ unsigned long native_calibrate_cpu(void)
831 return tsc_pit_min; 833 return tsc_pit_min;
832} 834}
833 835
836/**
837 * native_calibrate_cpu_early - can calibrate the cpu early in boot
838 */
839unsigned long native_calibrate_cpu_early(void)
840{
841 unsigned long flags, fast_calibrate = cpu_khz_from_cpuid();
842
843 if (!fast_calibrate)
844 fast_calibrate = cpu_khz_from_msr();
845 if (!fast_calibrate) {
846 local_irq_save(flags);
847 fast_calibrate = quick_pit_calibrate();
848 local_irq_restore(flags);
849 }
850 return fast_calibrate;
851}
852
853
854/**
855 * native_calibrate_cpu - calibrate the cpu
856 */
857static unsigned long native_calibrate_cpu(void)
858{
859 unsigned long tsc_freq = native_calibrate_cpu_early();
860
861 if (!tsc_freq)
862 tsc_freq = pit_hpet_ptimer_calibrate_cpu();
863
864 return tsc_freq;
865}
866
834void recalibrate_cpu_khz(void) 867void recalibrate_cpu_khz(void)
835{ 868{
836#ifndef CONFIG_SMP 869#ifndef CONFIG_SMP
@@ -1307,7 +1340,7 @@ unreg:
1307 1340
1308static int __init init_tsc_clocksource(void) 1341static int __init init_tsc_clocksource(void)
1309{ 1342{
1310 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) 1343 if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
1311 return 0; 1344 return 0;
1312 1345
1313 if (tsc_unstable) 1346 if (tsc_unstable)
@@ -1341,40 +1374,22 @@ unreg:
1341 */ 1374 */
1342device_initcall(init_tsc_clocksource); 1375device_initcall(init_tsc_clocksource);
1343 1376
1344void __init tsc_early_delay_calibrate(void) 1377static bool __init determine_cpu_tsc_frequencies(bool early)
1345{ 1378{
1346 unsigned long lpj; 1379 /* Make sure that cpu and tsc are not already calibrated */
1347 1380 WARN_ON(cpu_khz || tsc_khz);
1348 if (!boot_cpu_has(X86_FEATURE_TSC)) 1381
1349 return; 1382 if (early) {
1350 1383 cpu_khz = x86_platform.calibrate_cpu();
1351 cpu_khz = x86_platform.calibrate_cpu(); 1384 tsc_khz = x86_platform.calibrate_tsc();
1352 tsc_khz = x86_platform.calibrate_tsc(); 1385 } else {
1353 1386 /* We should not be here with non-native cpu calibration */
1354 tsc_khz = tsc_khz ? : cpu_khz; 1387 WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
1355 if (!tsc_khz) 1388 cpu_khz = pit_hpet_ptimer_calibrate_cpu();
1356 return;
1357
1358 lpj = tsc_khz * 1000;
1359 do_div(lpj, HZ);
1360 loops_per_jiffy = lpj;
1361}
1362
1363void __init tsc_init(void)
1364{
1365 u64 lpj, cyc;
1366 int cpu;
1367
1368 if (!boot_cpu_has(X86_FEATURE_TSC)) {
1369 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1370 return;
1371 } 1389 }
1372 1390
1373 cpu_khz = x86_platform.calibrate_cpu();
1374 tsc_khz = x86_platform.calibrate_tsc();
1375
1376 /* 1391 /*
1377 * Trust non-zero tsc_khz as authorative, 1392 * Trust non-zero tsc_khz as authoritative,
1378 * and use it to sanity check cpu_khz, 1393 * and use it to sanity check cpu_khz,
1379 * which will be off if system timer is off. 1394 * which will be off if system timer is off.
1380 */ 1395 */
@@ -1383,52 +1398,78 @@ void __init tsc_init(void)
1383 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) 1398 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
1384 cpu_khz = tsc_khz; 1399 cpu_khz = tsc_khz;
1385 1400
1386 if (!tsc_khz) { 1401 if (tsc_khz == 0)
1387 mark_tsc_unstable("could not calculate TSC khz"); 1402 return false;
1388 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1389 return;
1390 }
1391 1403
1392 pr_info("Detected %lu.%03lu MHz processor\n", 1404 pr_info("Detected %lu.%03lu MHz processor\n",
1393 (unsigned long)cpu_khz / 1000, 1405 (unsigned long)cpu_khz / KHZ,
1394 (unsigned long)cpu_khz % 1000); 1406 (unsigned long)cpu_khz % KHZ);
1395 1407
1396 if (cpu_khz != tsc_khz) { 1408 if (cpu_khz != tsc_khz) {
1397 pr_info("Detected %lu.%03lu MHz TSC", 1409 pr_info("Detected %lu.%03lu MHz TSC",
1398 (unsigned long)tsc_khz / 1000, 1410 (unsigned long)tsc_khz / KHZ,
1399 (unsigned long)tsc_khz % 1000); 1411 (unsigned long)tsc_khz % KHZ);
1400 } 1412 }
1413 return true;
1414}
1415
1416static unsigned long __init get_loops_per_jiffy(void)
1417{
1418 unsigned long lpj = tsc_khz * KHZ;
1401 1419
1420 do_div(lpj, HZ);
1421 return lpj;
1422}
1423
1424static void __init tsc_enable_sched_clock(void)
1425{
1402 /* Sanitize TSC ADJUST before cyc2ns gets initialized */ 1426 /* Sanitize TSC ADJUST before cyc2ns gets initialized */
1403 tsc_store_and_check_tsc_adjust(true); 1427 tsc_store_and_check_tsc_adjust(true);
1428 cyc2ns_init_boot_cpu();
1429 static_branch_enable(&__use_tsc);
1430}
1431
1432void __init tsc_early_init(void)
1433{
1434 if (!boot_cpu_has(X86_FEATURE_TSC))
1435 return;
1436 if (!determine_cpu_tsc_frequencies(true))
1437 return;
1438 loops_per_jiffy = get_loops_per_jiffy();
1404 1439
1440 tsc_enable_sched_clock();
1441}
1442
1443void __init tsc_init(void)
1444{
1405 /* 1445 /*
1406 * Secondary CPUs do not run through tsc_init(), so set up 1446 * native_calibrate_cpu_early can only calibrate using methods that are
1407 * all the scale factors for all CPUs, assuming the same 1447 * available early in boot.
1408 * speed as the bootup CPU. (cpufreq notifiers will fix this
1409 * up if their speed diverges)
1410 */ 1448 */
1411 cyc = rdtsc(); 1449 if (x86_platform.calibrate_cpu == native_calibrate_cpu_early)
1412 for_each_possible_cpu(cpu) { 1450 x86_platform.calibrate_cpu = native_calibrate_cpu;
1413 cyc2ns_init(cpu);
1414 set_cyc2ns_scale(tsc_khz, cpu, cyc);
1415 }
1416 1451
1417 if (tsc_disabled > 0) 1452 if (!boot_cpu_has(X86_FEATURE_TSC)) {
1453 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1418 return; 1454 return;
1455 }
1419 1456
1420 /* now allow native_sched_clock() to use rdtsc */ 1457 if (!tsc_khz) {
1458 /* We failed to determine frequencies earlier, try again */
1459 if (!determine_cpu_tsc_frequencies(false)) {
1460 mark_tsc_unstable("could not calculate TSC khz");
1461 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1462 return;
1463 }
1464 tsc_enable_sched_clock();
1465 }
1421 1466
1422 tsc_disabled = 0; 1467 cyc2ns_init_secondary_cpus();
1423 static_branch_enable(&__use_tsc);
1424 1468
1425 if (!no_sched_irq_time) 1469 if (!no_sched_irq_time)
1426 enable_sched_clock_irqtime(); 1470 enable_sched_clock_irqtime();
1427 1471
1428 lpj = ((u64)tsc_khz * 1000); 1472 lpj_fine = get_loops_per_jiffy();
1429 do_div(lpj, HZ);
1430 lpj_fine = lpj;
1431
1432 use_tsc_delay(); 1473 use_tsc_delay();
1433 1474
1434 check_system_tsc_reliable(); 1475 check_system_tsc_reliable();
@@ -1455,7 +1496,7 @@ unsigned long calibrate_delay_is_known(void)
1455 int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); 1496 int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
1456 const struct cpumask *mask = topology_core_cpumask(cpu); 1497 const struct cpumask *mask = topology_core_cpumask(cpu);
1457 1498
1458 if (tsc_disabled || !constant_tsc || !mask) 1499 if (!constant_tsc || !mask)
1459 return 0; 1500 return 0;
1460 1501
1461 sibling = cpumask_any_but(mask, cpu); 1502 sibling = cpumask_any_but(mask, cpu);
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 19afdbd7d0a7..27ef714d886c 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -1,17 +1,19 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * tsc_msr.c - TSC frequency enumeration via MSR 3 * TSC frequency enumeration via MSR
3 * 4 *
4 * Copyright (C) 2013 Intel Corporation 5 * Copyright (C) 2013, 2018 Intel Corporation
5 * Author: Bin Gao <bin.gao@intel.com> 6 * Author: Bin Gao <bin.gao@intel.com>
6 *
7 * This file is released under the GPLv2.
8 */ 7 */
9 8
10#include <linux/kernel.h> 9#include <linux/kernel.h>
11#include <asm/processor.h> 10
12#include <asm/setup.h>
13#include <asm/apic.h> 11#include <asm/apic.h>
12#include <asm/cpu_device_id.h>
13#include <asm/intel-family.h>
14#include <asm/msr.h>
14#include <asm/param.h> 15#include <asm/param.h>
16#include <asm/tsc.h>
15 17
16#define MAX_NUM_FREQS 9 18#define MAX_NUM_FREQS 9
17 19
@@ -23,44 +25,48 @@
23 * field msr_plat does. 25 * field msr_plat does.
24 */ 26 */
25struct freq_desc { 27struct freq_desc {
26 u8 x86_family; /* CPU family */
27 u8 x86_model; /* model */
28 u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */ 28 u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */
29 u32 freqs[MAX_NUM_FREQS]; 29 u32 freqs[MAX_NUM_FREQS];
30}; 30};
31 31
32static struct freq_desc freq_desc_tables[] = { 32/*
33 /* PNW */ 33 * Penwell and Clovertrail use spread spectrum clock,
34 { 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } }, 34 * so the freq number is not exactly the same as reported
35 /* CLV+ */ 35 * by MSR based on SDM.
36 { 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } }, 36 */
37 /* TNG - Intel Atom processor Z3400 series */ 37static const struct freq_desc freq_desc_pnw = {
38 { 6, 0x4a, 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } }, 38 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 }
39 /* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */
40 { 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } },
41 /* ANN - Intel Atom processor Z3500 series */
42 { 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } },
43 /* AMT - Intel Atom processor X7-Z8000 and X5-Z8000 series */
44 { 6, 0x4c, 1, { 83300, 100000, 133300, 116700,
45 80000, 93300, 90000, 88900, 87500 } },
46}; 39};
47 40
48static int match_cpu(u8 family, u8 model) 41static const struct freq_desc freq_desc_clv = {
49{ 42 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 }
50 int i; 43};
51 44
52 for (i = 0; i < ARRAY_SIZE(freq_desc_tables); i++) { 45static const struct freq_desc freq_desc_byt = {
53 if ((family == freq_desc_tables[i].x86_family) && 46 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 }
54 (model == freq_desc_tables[i].x86_model)) 47};
55 return i;
56 }
57 48
58 return -1; 49static const struct freq_desc freq_desc_cht = {
59} 50 1, { 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500 }
51};
60 52
61/* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */ 53static const struct freq_desc freq_desc_tng = {
62#define id_to_freq(cpu_index, freq_id) \ 54 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 }
63 (freq_desc_tables[cpu_index].freqs[freq_id]) 55};
56
57static const struct freq_desc freq_desc_ann = {
58 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 }
59};
60
61static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
62 INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw),
63 INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv),
64 INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt),
65 INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
66 INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng),
67 INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann),
68 {}
69};
64 70
65/* 71/*
66 * MSR-based CPU/TSC frequency discovery for certain CPUs. 72 * MSR-based CPU/TSC frequency discovery for certain CPUs.
@@ -70,18 +76,17 @@ static int match_cpu(u8 family, u8 model)
70 */ 76 */
71unsigned long cpu_khz_from_msr(void) 77unsigned long cpu_khz_from_msr(void)
72{ 78{
73 u32 lo, hi, ratio, freq_id, freq; 79 u32 lo, hi, ratio, freq;
80 const struct freq_desc *freq_desc;
81 const struct x86_cpu_id *id;
74 unsigned long res; 82 unsigned long res;
75 int cpu_index;
76
77 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
78 return 0;
79 83
80 cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model); 84 id = x86_match_cpu(tsc_msr_cpu_ids);
81 if (cpu_index < 0) 85 if (!id)
82 return 0; 86 return 0;
83 87
84 if (freq_desc_tables[cpu_index].msr_plat) { 88 freq_desc = (struct freq_desc *)id->driver_data;
89 if (freq_desc->msr_plat) {
85 rdmsr(MSR_PLATFORM_INFO, lo, hi); 90 rdmsr(MSR_PLATFORM_INFO, lo, hi);
86 ratio = (lo >> 8) & 0xff; 91 ratio = (lo >> 8) & 0xff;
87 } else { 92 } else {
@@ -91,8 +96,9 @@ unsigned long cpu_khz_from_msr(void)
91 96
92 /* Get FSB FREQ ID */ 97 /* Get FSB FREQ ID */
93 rdmsr(MSR_FSB_FREQ, lo, hi); 98 rdmsr(MSR_FSB_FREQ, lo, hi);
94 freq_id = lo & 0x7; 99
95 freq = id_to_freq(cpu_index, freq_id); 100 /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
101 freq = freq_desc->freqs[lo & 0x7];
96 102
97 /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ 103 /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
98 res = freq * ratio; 104 res = freq * ratio;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index feb28fee6cea..26038eacf74a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -198,7 +198,7 @@ static int orc_sort_cmp(const void *_a, const void *_b)
198 * whitelisted .o files which didn't get objtool generation. 198 * whitelisted .o files which didn't get objtool generation.
199 */ 199 */
200 orc_a = cur_orc_table + (a - cur_orc_ip_table); 200 orc_a = cur_orc_table + (a - cur_orc_ip_table);
201 return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1; 201 return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1;
202} 202}
203 203
204#ifdef CONFIG_MODULES 204#ifdef CONFIG_MODULES
@@ -352,7 +352,7 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
352 352
353bool unwind_next_frame(struct unwind_state *state) 353bool unwind_next_frame(struct unwind_state *state)
354{ 354{
355 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; 355 unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
356 enum stack_type prev_type = state->stack_info.type; 356 enum stack_type prev_type = state->stack_info.type;
357 struct orc_entry *orc; 357 struct orc_entry *orc;
358 bool indirect = false; 358 bool indirect = false;
@@ -363,9 +363,9 @@ bool unwind_next_frame(struct unwind_state *state)
363 /* Don't let modules unload while we're reading their ORC data. */ 363 /* Don't let modules unload while we're reading their ORC data. */
364 preempt_disable(); 364 preempt_disable();
365 365
366 /* Have we reached the end? */ 366 /* End-of-stack check for user tasks: */
367 if (state->regs && user_mode(state->regs)) 367 if (state->regs && user_mode(state->regs))
368 goto done; 368 goto the_end;
369 369
370 /* 370 /*
371 * Find the orc_entry associated with the text address. 371 * Find the orc_entry associated with the text address.
@@ -374,9 +374,16 @@ bool unwind_next_frame(struct unwind_state *state)
374 * calls and calls to noreturn functions. 374 * calls and calls to noreturn functions.
375 */ 375 */
376 orc = orc_find(state->signal ? state->ip : state->ip - 1); 376 orc = orc_find(state->signal ? state->ip : state->ip - 1);
377 if (!orc || orc->sp_reg == ORC_REG_UNDEFINED) 377 if (!orc)
378 goto done; 378 goto err;
379 orig_ip = state->ip; 379
380 /* End-of-stack check for kernel threads: */
381 if (orc->sp_reg == ORC_REG_UNDEFINED) {
382 if (!orc->end)
383 goto err;
384
385 goto the_end;
386 }
380 387
381 /* Find the previous frame's stack: */ 388 /* Find the previous frame's stack: */
382 switch (orc->sp_reg) { 389 switch (orc->sp_reg) {
@@ -402,7 +409,7 @@ bool unwind_next_frame(struct unwind_state *state)
402 if (!state->regs || !state->full_regs) { 409 if (!state->regs || !state->full_regs) {
403 orc_warn("missing regs for base reg R10 at ip %pB\n", 410 orc_warn("missing regs for base reg R10 at ip %pB\n",
404 (void *)state->ip); 411 (void *)state->ip);
405 goto done; 412 goto err;
406 } 413 }
407 sp = state->regs->r10; 414 sp = state->regs->r10;
408 break; 415 break;
@@ -411,7 +418,7 @@ bool unwind_next_frame(struct unwind_state *state)
411 if (!state->regs || !state->full_regs) { 418 if (!state->regs || !state->full_regs) {
412 orc_warn("missing regs for base reg R13 at ip %pB\n", 419 orc_warn("missing regs for base reg R13 at ip %pB\n",
413 (void *)state->ip); 420 (void *)state->ip);
414 goto done; 421 goto err;
415 } 422 }
416 sp = state->regs->r13; 423 sp = state->regs->r13;
417 break; 424 break;
@@ -420,7 +427,7 @@ bool unwind_next_frame(struct unwind_state *state)
420 if (!state->regs || !state->full_regs) { 427 if (!state->regs || !state->full_regs) {
421 orc_warn("missing regs for base reg DI at ip %pB\n", 428 orc_warn("missing regs for base reg DI at ip %pB\n",
422 (void *)state->ip); 429 (void *)state->ip);
423 goto done; 430 goto err;
424 } 431 }
425 sp = state->regs->di; 432 sp = state->regs->di;
426 break; 433 break;
@@ -429,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state)
429 if (!state->regs || !state->full_regs) { 436 if (!state->regs || !state->full_regs) {
430 orc_warn("missing regs for base reg DX at ip %pB\n", 437 orc_warn("missing regs for base reg DX at ip %pB\n",
431 (void *)state->ip); 438 (void *)state->ip);
432 goto done; 439 goto err;
433 } 440 }
434 sp = state->regs->dx; 441 sp = state->regs->dx;
435 break; 442 break;
@@ -437,12 +444,12 @@ bool unwind_next_frame(struct unwind_state *state)
437 default: 444 default:
438 orc_warn("unknown SP base reg %d for ip %pB\n", 445 orc_warn("unknown SP base reg %d for ip %pB\n",
439 orc->sp_reg, (void *)state->ip); 446 orc->sp_reg, (void *)state->ip);
440 goto done; 447 goto err;
441 } 448 }
442 449
443 if (indirect) { 450 if (indirect) {
444 if (!deref_stack_reg(state, sp, &sp)) 451 if (!deref_stack_reg(state, sp, &sp))
445 goto done; 452 goto err;
446 } 453 }
447 454
448 /* Find IP, SP and possibly regs: */ 455 /* Find IP, SP and possibly regs: */
@@ -451,7 +458,7 @@ bool unwind_next_frame(struct unwind_state *state)
451 ip_p = sp - sizeof(long); 458 ip_p = sp - sizeof(long);
452 459
453 if (!deref_stack_reg(state, ip_p, &state->ip)) 460 if (!deref_stack_reg(state, ip_p, &state->ip))
454 goto done; 461 goto err;
455 462
456 state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, 463 state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
457 state->ip, (void *)ip_p); 464 state->ip, (void *)ip_p);
@@ -465,7 +472,7 @@ bool unwind_next_frame(struct unwind_state *state)
465 if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { 472 if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
466 orc_warn("can't dereference registers at %p for ip %pB\n", 473 orc_warn("can't dereference registers at %p for ip %pB\n",
467 (void *)sp, (void *)orig_ip); 474 (void *)sp, (void *)orig_ip);
468 goto done; 475 goto err;
469 } 476 }
470 477
471 state->regs = (struct pt_regs *)sp; 478 state->regs = (struct pt_regs *)sp;
@@ -477,7 +484,7 @@ bool unwind_next_frame(struct unwind_state *state)
477 if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { 484 if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
478 orc_warn("can't dereference iret registers at %p for ip %pB\n", 485 orc_warn("can't dereference iret registers at %p for ip %pB\n",
479 (void *)sp, (void *)orig_ip); 486 (void *)sp, (void *)orig_ip);
480 goto done; 487 goto err;
481 } 488 }
482 489
483 state->regs = (void *)sp - IRET_FRAME_OFFSET; 490 state->regs = (void *)sp - IRET_FRAME_OFFSET;
@@ -500,18 +507,18 @@ bool unwind_next_frame(struct unwind_state *state)
500 507
501 case ORC_REG_PREV_SP: 508 case ORC_REG_PREV_SP:
502 if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp)) 509 if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
503 goto done; 510 goto err;
504 break; 511 break;
505 512
506 case ORC_REG_BP: 513 case ORC_REG_BP:
507 if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp)) 514 if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
508 goto done; 515 goto err;
509 break; 516 break;
510 517
511 default: 518 default:
512 orc_warn("unknown BP base reg %d for ip %pB\n", 519 orc_warn("unknown BP base reg %d for ip %pB\n",
513 orc->bp_reg, (void *)orig_ip); 520 orc->bp_reg, (void *)orig_ip);
514 goto done; 521 goto err;
515 } 522 }
516 523
517 /* Prevent a recursive loop due to bad ORC data: */ 524 /* Prevent a recursive loop due to bad ORC data: */
@@ -520,13 +527,16 @@ bool unwind_next_frame(struct unwind_state *state)
520 state->sp <= prev_sp) { 527 state->sp <= prev_sp) {
521 orc_warn("stack going in the wrong direction? ip=%pB\n", 528 orc_warn("stack going in the wrong direction? ip=%pB\n",
522 (void *)orig_ip); 529 (void *)orig_ip);
523 goto done; 530 goto err;
524 } 531 }
525 532
526 preempt_enable(); 533 preempt_enable();
527 return true; 534 return true;
528 535
529done: 536err:
537 state->error = true;
538
539the_end:
530 preempt_enable(); 540 preempt_enable();
531 state->stack_info.type = STACK_TYPE_UNKNOWN; 541 state->stack_info.type = STACK_TYPE_UNKNOWN;
532 return false; 542 return false;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 9d0b5af7db91..1c03e4aa6474 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -149,7 +149,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
149 preempt_disable(); 149 preempt_disable();
150 tsk->thread.sp0 = vm86->saved_sp0; 150 tsk->thread.sp0 = vm86->saved_sp0;
151 tsk->thread.sysenter_cs = __KERNEL_CS; 151 tsk->thread.sysenter_cs = __KERNEL_CS;
152 update_sp0(tsk); 152 update_task_stack(tsk);
153 refresh_sysenter_cs(&tsk->thread); 153 refresh_sysenter_cs(&tsk->thread);
154 vm86->saved_sp0 = 0; 154 vm86->saved_sp0 = 0;
155 preempt_enable(); 155 preempt_enable();
@@ -374,7 +374,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
374 refresh_sysenter_cs(&tsk->thread); 374 refresh_sysenter_cs(&tsk->thread);
375 } 375 }
376 376
377 update_sp0(tsk); 377 update_task_stack(tsk);
378 preempt_enable(); 378 preempt_enable();
379 379
380 if (vm86->flags & VM86_SCREEN_BITMAP) 380 if (vm86->flags & VM86_SCREEN_BITMAP)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 5e1458f609a1..8bde0a419f86 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -55,19 +55,22 @@ jiffies_64 = jiffies;
55 * so we can enable protection checks as well as retain 2MB large page 55 * so we can enable protection checks as well as retain 2MB large page
56 * mappings for kernel text. 56 * mappings for kernel text.
57 */ 57 */
58#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); 58#define X86_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
59 59
60#define X64_ALIGN_RODATA_END \ 60#define X86_ALIGN_RODATA_END \
61 . = ALIGN(HPAGE_SIZE); \ 61 . = ALIGN(HPAGE_SIZE); \
62 __end_rodata_hpage_align = .; 62 __end_rodata_hpage_align = .; \
63 __end_rodata_aligned = .;
63 64
64#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); 65#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
65#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); 66#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
66 67
67#else 68#else
68 69
69#define X64_ALIGN_RODATA_BEGIN 70#define X86_ALIGN_RODATA_BEGIN
70#define X64_ALIGN_RODATA_END 71#define X86_ALIGN_RODATA_END \
72 . = ALIGN(PAGE_SIZE); \
73 __end_rodata_aligned = .;
71 74
72#define ALIGN_ENTRY_TEXT_BEGIN 75#define ALIGN_ENTRY_TEXT_BEGIN
73#define ALIGN_ENTRY_TEXT_END 76#define ALIGN_ENTRY_TEXT_END
@@ -141,9 +144,9 @@ SECTIONS
141 144
142 /* .text should occupy whole number of pages */ 145 /* .text should occupy whole number of pages */
143 . = ALIGN(PAGE_SIZE); 146 . = ALIGN(PAGE_SIZE);
144 X64_ALIGN_RODATA_BEGIN 147 X86_ALIGN_RODATA_BEGIN
145 RO_DATA(PAGE_SIZE) 148 RO_DATA(PAGE_SIZE)
146 X64_ALIGN_RODATA_END 149 X86_ALIGN_RODATA_END
147 150
148 /* Data */ 151 /* Data */
149 .data : AT(ADDR(.data) - LOAD_OFFSET) { 152 .data : AT(ADDR(.data) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 3ab867603e81..2792b5573818 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -109,7 +109,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
109static void default_nmi_init(void) { }; 109static void default_nmi_init(void) { };
110 110
111struct x86_platform_ops x86_platform __ro_after_init = { 111struct x86_platform_ops x86_platform __ro_after_init = {
112 .calibrate_cpu = native_calibrate_cpu, 112 .calibrate_cpu = native_calibrate_cpu_early,
113 .calibrate_tsc = native_calibrate_tsc, 113 .calibrate_tsc = native_calibrate_tsc,
114 .get_wallclock = mach_get_cmos_time, 114 .get_wallclock = mach_get_cmos_time,
115 .set_wallclock = mach_set_rtc_mmss, 115 .set_wallclock = mach_set_rtc_mmss,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b5cd8465d44f..d536d457517b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1379,7 +1379,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
1379 * using swait_active() is safe. 1379 * using swait_active() is safe.
1380 */ 1380 */
1381 if (swait_active(q)) 1381 if (swait_active(q))
1382 swake_up(q); 1382 swake_up_one(q);
1383 1383
1384 if (apic_lvtt_tscdeadline(apic)) 1384 if (apic_lvtt_tscdeadline(apic))
1385 ktimer->expired_tscdeadline = ktimer->tscdeadline; 1385 ktimer->expired_tscdeadline = ktimer->tscdeadline;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e30da9a2430c..5d8e317c2b04 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7893,6 +7893,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
7893 HRTIMER_MODE_REL_PINNED); 7893 HRTIMER_MODE_REL_PINNED);
7894 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; 7894 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
7895 7895
7896 vmx->nested.vpid02 = allocate_vpid();
7897
7896 vmx->nested.vmxon = true; 7898 vmx->nested.vmxon = true;
7897 return 0; 7899 return 0;
7898 7900
@@ -8480,21 +8482,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
8480/* Emulate the VMPTRST instruction */ 8482/* Emulate the VMPTRST instruction */
8481static int handle_vmptrst(struct kvm_vcpu *vcpu) 8483static int handle_vmptrst(struct kvm_vcpu *vcpu)
8482{ 8484{
8483 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 8485 unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
8484 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 8486 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
8485 gva_t vmcs_gva; 8487 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
8486 struct x86_exception e; 8488 struct x86_exception e;
8489 gva_t gva;
8487 8490
8488 if (!nested_vmx_check_permission(vcpu)) 8491 if (!nested_vmx_check_permission(vcpu))
8489 return 1; 8492 return 1;
8490 8493
8491 if (get_vmx_mem_address(vcpu, exit_qualification, 8494 if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
8492 vmx_instruction_info, true, &vmcs_gva))
8493 return 1; 8495 return 1;
8494 /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ 8496 /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
8495 if (kvm_write_guest_virt_system(vcpu, vmcs_gva, 8497 if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
8496 (void *)&to_vmx(vcpu)->nested.current_vmptr, 8498 sizeof(gpa_t), &e)) {
8497 sizeof(u64), &e)) {
8498 kvm_inject_page_fault(vcpu, &e); 8499 kvm_inject_page_fault(vcpu, &e);
8499 return 1; 8500 return 1;
8500 } 8501 }
@@ -10370,11 +10371,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
10370 goto free_vmcs; 10371 goto free_vmcs;
10371 } 10372 }
10372 10373
10373 if (nested) { 10374 if (nested)
10374 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, 10375 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
10375 kvm_vcpu_apicv_active(&vmx->vcpu)); 10376 kvm_vcpu_apicv_active(&vmx->vcpu));
10376 vmx->nested.vpid02 = allocate_vpid();
10377 }
10378 10377
10379 vmx->nested.posted_intr_nv = -1; 10378 vmx->nested.posted_intr_nv = -1;
10380 vmx->nested.current_vmptr = -1ull; 10379 vmx->nested.current_vmptr = -1ull;
@@ -10391,7 +10390,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
10391 return &vmx->vcpu; 10390 return &vmx->vcpu;
10392 10391
10393free_vmcs: 10392free_vmcs:
10394 free_vpid(vmx->nested.vpid02);
10395 free_loaded_vmcs(vmx->loaded_vmcs); 10393 free_loaded_vmcs(vmx->loaded_vmcs);
10396free_msrs: 10394free_msrs:
10397 kfree(vmx->guest_msrs); 10395 kfree(vmx->guest_msrs);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 298ef1479240..3b24dc05251c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe)
256 256
257 /* Copy successful. Return zero */ 257 /* Copy successful. Return zero */
258.L_done_memcpy_trap: 258.L_done_memcpy_trap:
259 xorq %rax, %rax 259 xorl %eax, %eax
260 ret 260 ret
261ENDPROC(__memcpy_mcsafe) 261ENDPROC(__memcpy_mcsafe)
262EXPORT_SYMBOL_GPL(__memcpy_mcsafe) 262EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 2f3c9196b834..a12afff146d1 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -111,6 +111,8 @@ static struct addr_marker address_markers[] = {
111 [END_OF_SPACE_NR] = { -1, NULL } 111 [END_OF_SPACE_NR] = { -1, NULL }
112}; 112};
113 113
114#define INIT_PGD ((pgd_t *) &init_top_pgt)
115
114#else /* CONFIG_X86_64 */ 116#else /* CONFIG_X86_64 */
115 117
116enum address_markers_idx { 118enum address_markers_idx {
@@ -121,6 +123,9 @@ enum address_markers_idx {
121#ifdef CONFIG_HIGHMEM 123#ifdef CONFIG_HIGHMEM
122 PKMAP_BASE_NR, 124 PKMAP_BASE_NR,
123#endif 125#endif
126#ifdef CONFIG_MODIFY_LDT_SYSCALL
127 LDT_NR,
128#endif
124 CPU_ENTRY_AREA_NR, 129 CPU_ENTRY_AREA_NR,
125 FIXADDR_START_NR, 130 FIXADDR_START_NR,
126 END_OF_SPACE_NR, 131 END_OF_SPACE_NR,
@@ -134,11 +139,16 @@ static struct addr_marker address_markers[] = {
134#ifdef CONFIG_HIGHMEM 139#ifdef CONFIG_HIGHMEM
135 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, 140 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
136#endif 141#endif
142#ifdef CONFIG_MODIFY_LDT_SYSCALL
143 [LDT_NR] = { 0UL, "LDT remap" },
144#endif
137 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, 145 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
138 [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, 146 [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
139 [END_OF_SPACE_NR] = { -1, NULL } 147 [END_OF_SPACE_NR] = { -1, NULL }
140}; 148};
141 149
150#define INIT_PGD (swapper_pg_dir)
151
142#endif /* !CONFIG_X86_64 */ 152#endif /* !CONFIG_X86_64 */
143 153
144/* Multipliers for offsets within the PTEs */ 154/* Multipliers for offsets within the PTEs */
@@ -496,11 +506,7 @@ static inline bool is_hypervisor_range(int idx)
496static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, 506static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
497 bool checkwx, bool dmesg) 507 bool checkwx, bool dmesg)
498{ 508{
499#ifdef CONFIG_X86_64 509 pgd_t *start = INIT_PGD;
500 pgd_t *start = (pgd_t *) &init_top_pgt;
501#else
502 pgd_t *start = swapper_pg_dir;
503#endif
504 pgprotval_t prot, eff; 510 pgprotval_t prot, eff;
505 int i; 511 int i;
506 struct pg_state st = {}; 512 struct pg_state st = {};
@@ -563,12 +569,13 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
563} 569}
564EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); 570EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
565 571
566static void ptdump_walk_user_pgd_level_checkwx(void) 572void ptdump_walk_user_pgd_level_checkwx(void)
567{ 573{
568#ifdef CONFIG_PAGE_TABLE_ISOLATION 574#ifdef CONFIG_PAGE_TABLE_ISOLATION
569 pgd_t *pgd = (pgd_t *) &init_top_pgt; 575 pgd_t *pgd = INIT_PGD;
570 576
571 if (!static_cpu_has(X86_FEATURE_PTI)) 577 if (!(__supported_pte_mask & _PAGE_NX) ||
578 !static_cpu_has(X86_FEATURE_PTI))
572 return; 579 return;
573 580
574 pr_info("x86/mm: Checking user space page tables\n"); 581 pr_info("x86/mm: Checking user space page tables\n");
@@ -580,7 +587,6 @@ static void ptdump_walk_user_pgd_level_checkwx(void)
580void ptdump_walk_pgd_level_checkwx(void) 587void ptdump_walk_pgd_level_checkwx(void)
581{ 588{
582 ptdump_walk_pgd_level_core(NULL, NULL, true, false); 589 ptdump_walk_pgd_level_core(NULL, NULL, true, false);
583 ptdump_walk_user_pgd_level_checkwx();
584} 590}
585 591
586static int __init pt_dump_init(void) 592static int __init pt_dump_init(void)
@@ -609,6 +615,9 @@ static int __init pt_dump_init(void)
609# endif 615# endif
610 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 616 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
611 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; 617 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
618# ifdef CONFIG_MODIFY_LDT_SYSCALL
619 address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
620# endif
612#endif 621#endif
613 return 0; 622 return 0;
614} 623}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2aafa6ab6103..db1c042e9853 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -317,8 +317,6 @@ static noinline int vmalloc_fault(unsigned long address)
317 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 317 if (!(address >= VMALLOC_START && address < VMALLOC_END))
318 return -1; 318 return -1;
319 319
320 WARN_ON_ONCE(in_nmi());
321
322 /* 320 /*
323 * Synchronize this task's top level page-table 321 * Synchronize this task's top level page-table
324 * with the 'reference' page table. 322 * with the 'reference' page table.
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index cee58a972cb2..74b157ac078d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -773,13 +773,44 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
773 } 773 }
774} 774}
775 775
776/*
777 * begin/end can be in the direct map or the "high kernel mapping"
778 * used for the kernel image only. free_init_pages() will do the
779 * right thing for either kind of address.
780 */
781void free_kernel_image_pages(void *begin, void *end)
782{
783 unsigned long begin_ul = (unsigned long)begin;
784 unsigned long end_ul = (unsigned long)end;
785 unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
786
787
788 free_init_pages("unused kernel image", begin_ul, end_ul);
789
790 /*
791 * PTI maps some of the kernel into userspace. For performance,
792 * this includes some kernel areas that do not contain secrets.
793 * Those areas might be adjacent to the parts of the kernel image
794 * being freed, which may contain secrets. Remove the "high kernel
795 * image mapping" for these freed areas, ensuring they are not even
796 * potentially vulnerable to Meltdown regardless of the specific
797 * optimizations PTI is currently using.
798 *
799 * The "noalias" prevents unmapping the direct map alias which is
800 * needed to access the freed pages.
801 *
802 * This is only valid for 64bit kernels. 32bit has only one mapping
803 * which can't be treated in this way for obvious reasons.
804 */
805 if (IS_ENABLED(CONFIG_X86_64) && cpu_feature_enabled(X86_FEATURE_PTI))
806 set_memory_np_noalias(begin_ul, len_pages);
807}
808
776void __ref free_initmem(void) 809void __ref free_initmem(void)
777{ 810{
778 e820__reallocate_tables(); 811 e820__reallocate_tables();
779 812
780 free_init_pages("unused kernel", 813 free_kernel_image_pages(&__init_begin, &__init_end);
781 (unsigned long)(&__init_begin),
782 (unsigned long)(&__init_end));
783} 814}
784 815
785#ifdef CONFIG_BLK_DEV_INITRD 816#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a688617c727e..dd519f372169 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1283,20 +1283,10 @@ void mark_rodata_ro(void)
1283 set_memory_ro(start, (end-start) >> PAGE_SHIFT); 1283 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
1284#endif 1284#endif
1285 1285
1286 free_init_pages("unused kernel", 1286 free_kernel_image_pages((void *)text_end, (void *)rodata_start);
1287 (unsigned long) __va(__pa_symbol(text_end)), 1287 free_kernel_image_pages((void *)rodata_end, (void *)_sdata);
1288 (unsigned long) __va(__pa_symbol(rodata_start)));
1289 free_init_pages("unused kernel",
1290 (unsigned long) __va(__pa_symbol(rodata_end)),
1291 (unsigned long) __va(__pa_symbol(_sdata)));
1292 1288
1293 debug_checkwx(); 1289 debug_checkwx();
1294
1295 /*
1296 * Do this after all of the manipulation of the
1297 * kernel text page tables are complete.
1298 */
1299 pti_clone_kernel_text();
1300} 1290}
1301 1291
1302int kern_addr_valid(unsigned long addr) 1292int kern_addr_valid(unsigned long addr)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 34a2a3bfde9c..b54d52a2d00a 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -61,7 +61,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
61 eb->nid = nid; 61 eb->nid = nid;
62 62
63 if (emu_nid_to_phys[nid] == NUMA_NO_NODE) 63 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
64 emu_nid_to_phys[nid] = nid; 64 emu_nid_to_phys[nid] = pb->nid;
65 65
66 pb->start += size; 66 pb->start += size;
67 if (pb->start >= pb->end) { 67 if (pb->start >= pb->end) {
@@ -198,40 +198,73 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
198 return end; 198 return end;
199} 199}
200 200
201static u64 uniform_size(u64 max_addr, u64 base, u64 hole, int nr_nodes)
202{
203 unsigned long max_pfn = PHYS_PFN(max_addr);
204 unsigned long base_pfn = PHYS_PFN(base);
205 unsigned long hole_pfns = PHYS_PFN(hole);
206
207 return PFN_PHYS((max_pfn - base_pfn - hole_pfns) / nr_nodes);
208}
209
201/* 210/*
202 * Sets up fake nodes of `size' interleaved over physical nodes ranging from 211 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
203 * `addr' to `max_addr'. 212 * `addr' to `max_addr'.
204 * 213 *
205 * Returns zero on success or negative on error. 214 * Returns zero on success or negative on error.
206 */ 215 */
207static int __init split_nodes_size_interleave(struct numa_meminfo *ei, 216static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei,
208 struct numa_meminfo *pi, 217 struct numa_meminfo *pi,
209 u64 addr, u64 max_addr, u64 size) 218 u64 addr, u64 max_addr, u64 size,
219 int nr_nodes, struct numa_memblk *pblk,
220 int nid)
210{ 221{
211 nodemask_t physnode_mask = numa_nodes_parsed; 222 nodemask_t physnode_mask = numa_nodes_parsed;
223 int i, ret, uniform = 0;
212 u64 min_size; 224 u64 min_size;
213 int nid = 0;
214 int i, ret;
215 225
216 if (!size) 226 if ((!size && !nr_nodes) || (nr_nodes && !pblk))
217 return -1; 227 return -1;
228
218 /* 229 /*
219 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is 230 * In the 'uniform' case split the passed in physical node by
220 * increased accordingly if the requested size is too small. This 231 * nr_nodes, in the non-uniform case, ignore the passed in
221 * creates a uniform distribution of node sizes across the entire 232 * physical block and try to create nodes of at least size
222 * machine (but not necessarily over physical nodes). 233 * @size.
234 *
235 * In the uniform case, split the nodes strictly by physical
236 * capacity, i.e. ignore holes. In the non-uniform case account
237 * for holes and treat @size as a minimum floor.
223 */ 238 */
224 min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES; 239 if (!nr_nodes)
225 min_size = max(min_size, FAKE_NODE_MIN_SIZE); 240 nr_nodes = MAX_NUMNODES;
226 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) 241 else {
227 min_size = (min_size + FAKE_NODE_MIN_SIZE) & 242 nodes_clear(physnode_mask);
228 FAKE_NODE_MIN_HASH_MASK; 243 node_set(pblk->nid, physnode_mask);
244 uniform = 1;
245 }
246
247 if (uniform) {
248 min_size = uniform_size(max_addr, addr, 0, nr_nodes);
249 size = min_size;
250 } else {
251 /*
252 * The limit on emulated nodes is MAX_NUMNODES, so the
253 * size per node is increased accordingly if the
254 * requested size is too small. This creates a uniform
255 * distribution of node sizes across the entire machine
256 * (but not necessarily over physical nodes).
257 */
258 min_size = uniform_size(max_addr, addr,
259 mem_hole_size(addr, max_addr), nr_nodes);
260 }
261 min_size = ALIGN(max(min_size, FAKE_NODE_MIN_SIZE), FAKE_NODE_MIN_SIZE);
229 if (size < min_size) { 262 if (size < min_size) {
230 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", 263 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
231 size >> 20, min_size >> 20); 264 size >> 20, min_size >> 20);
232 size = min_size; 265 size = min_size;
233 } 266 }
234 size &= FAKE_NODE_MIN_HASH_MASK; 267 size = ALIGN_DOWN(size, FAKE_NODE_MIN_SIZE);
235 268
236 /* 269 /*
237 * Fill physical nodes with fake nodes of size until there is no memory 270 * Fill physical nodes with fake nodes of size until there is no memory
@@ -248,10 +281,14 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
248 node_clear(i, physnode_mask); 281 node_clear(i, physnode_mask);
249 continue; 282 continue;
250 } 283 }
284
251 start = pi->blk[phys_blk].start; 285 start = pi->blk[phys_blk].start;
252 limit = pi->blk[phys_blk].end; 286 limit = pi->blk[phys_blk].end;
253 287
254 end = find_end_of_node(start, limit, size); 288 if (uniform)
289 end = start + size;
290 else
291 end = find_end_of_node(start, limit, size);
255 /* 292 /*
256 * If there won't be at least FAKE_NODE_MIN_SIZE of 293 * If there won't be at least FAKE_NODE_MIN_SIZE of
257 * non-reserved memory in ZONE_DMA32 for the next node, 294 * non-reserved memory in ZONE_DMA32 for the next node,
@@ -266,7 +303,8 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
266 * next node, this one must extend to the end of the 303 * next node, this one must extend to the end of the
267 * physical node. 304 * physical node.
268 */ 305 */
269 if (limit - end - mem_hole_size(end, limit) < size) 306 if ((limit - end - mem_hole_size(end, limit) < size)
307 && !uniform)
270 end = limit; 308 end = limit;
271 309
272 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, 310 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
@@ -276,7 +314,15 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
276 return ret; 314 return ret;
277 } 315 }
278 } 316 }
279 return 0; 317 return nid;
318}
319
320static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
321 struct numa_meminfo *pi,
322 u64 addr, u64 max_addr, u64 size)
323{
324 return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size,
325 0, NULL, NUMA_NO_NODE);
280} 326}
281 327
282int __init setup_emu2phys_nid(int *dfl_phys_nid) 328int __init setup_emu2phys_nid(int *dfl_phys_nid)
@@ -346,7 +392,28 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
346 * the fixed node size. Otherwise, if it is just a single number N, 392 * the fixed node size. Otherwise, if it is just a single number N,
347 * split the system RAM into N fake nodes. 393 * split the system RAM into N fake nodes.
348 */ 394 */
349 if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) { 395 if (strchr(emu_cmdline, 'U')) {
396 nodemask_t physnode_mask = numa_nodes_parsed;
397 unsigned long n;
398 int nid = 0;
399
400 n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
401 ret = -1;
402 for_each_node_mask(i, physnode_mask) {
403 ret = split_nodes_size_interleave_uniform(&ei, &pi,
404 pi.blk[i].start, pi.blk[i].end, 0,
405 n, &pi.blk[i], nid);
406 if (ret < 0)
407 break;
408 if (ret < n) {
409 pr_info("%s: phys: %d only got %d of %ld nodes, failing\n",
410 __func__, i, ret, n);
411 ret = -1;
412 break;
413 }
414 nid = ret;
415 }
416 } else if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) {
350 u64 size; 417 u64 size;
351 418
352 size = memparse(emu_cmdline, &emu_cmdline); 419 size = memparse(emu_cmdline, &emu_cmdline);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3bded76e8d5c..0a74996a1149 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -53,6 +53,7 @@ static DEFINE_SPINLOCK(cpa_lock);
53#define CPA_FLUSHTLB 1 53#define CPA_FLUSHTLB 1
54#define CPA_ARRAY 2 54#define CPA_ARRAY 2
55#define CPA_PAGES_ARRAY 4 55#define CPA_PAGES_ARRAY 4
56#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
56 57
57#ifdef CONFIG_PROC_FS 58#ifdef CONFIG_PROC_FS
58static unsigned long direct_pages_count[PG_LEVEL_NUM]; 59static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -1486,6 +1487,9 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
1486 1487
1487 /* No alias checking for _NX bit modifications */ 1488 /* No alias checking for _NX bit modifications */
1488 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 1489 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
1490 /* Has caller explicitly disabled alias checking? */
1491 if (in_flag & CPA_NO_CHECK_ALIAS)
1492 checkalias = 0;
1489 1493
1490 ret = __change_page_attr_set_clr(&cpa, checkalias); 1494 ret = __change_page_attr_set_clr(&cpa, checkalias);
1491 1495
@@ -1772,6 +1776,15 @@ int set_memory_np(unsigned long addr, int numpages)
1772 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); 1776 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
1773} 1777}
1774 1778
1779int set_memory_np_noalias(unsigned long addr, int numpages)
1780{
1781 int cpa_flags = CPA_NO_CHECK_ALIAS;
1782
1783 return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
1784 __pgprot(_PAGE_PRESENT), 0,
1785 cpa_flags, NULL);
1786}
1787
1775int set_memory_4k(unsigned long addr, int numpages) 1788int set_memory_4k(unsigned long addr, int numpages)
1776{ 1789{
1777 return change_page_attr_set_clr(&addr, numpages, __pgprot(0), 1790 return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
@@ -1784,6 +1797,12 @@ int set_memory_nonglobal(unsigned long addr, int numpages)
1784 __pgprot(_PAGE_GLOBAL), 0); 1797 __pgprot(_PAGE_GLOBAL), 0);
1785} 1798}
1786 1799
1800int set_memory_global(unsigned long addr, int numpages)
1801{
1802 return change_page_attr_set(&addr, numpages,
1803 __pgprot(_PAGE_GLOBAL), 0);
1804}
1805
1787static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) 1806static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
1788{ 1807{
1789 struct cpa_data cpa; 1808 struct cpa_data cpa;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 47b5951e592b..3ef095c70ae3 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -182,6 +182,14 @@ static void pgd_dtor(pgd_t *pgd)
182 */ 182 */
183#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD 183#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
184 184
185/*
186 * We allocate separate PMDs for the kernel part of the user page-table
187 * when PTI is enabled. We need them to map the per-process LDT into the
188 * user-space page-table.
189 */
190#define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
191 KERNEL_PGD_PTRS : 0)
192
185void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) 193void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
186{ 194{
187 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); 195 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
@@ -202,14 +210,14 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
202 210
203/* No need to prepopulate any pagetable entries in non-PAE modes. */ 211/* No need to prepopulate any pagetable entries in non-PAE modes. */
204#define PREALLOCATED_PMDS 0 212#define PREALLOCATED_PMDS 0
205 213#define PREALLOCATED_USER_PMDS 0
206#endif /* CONFIG_X86_PAE */ 214#endif /* CONFIG_X86_PAE */
207 215
208static void free_pmds(struct mm_struct *mm, pmd_t *pmds[]) 216static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
209{ 217{
210 int i; 218 int i;
211 219
212 for(i = 0; i < PREALLOCATED_PMDS; i++) 220 for (i = 0; i < count; i++)
213 if (pmds[i]) { 221 if (pmds[i]) {
214 pgtable_pmd_page_dtor(virt_to_page(pmds[i])); 222 pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
215 free_page((unsigned long)pmds[i]); 223 free_page((unsigned long)pmds[i]);
@@ -217,7 +225,7 @@ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
217 } 225 }
218} 226}
219 227
220static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) 228static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
221{ 229{
222 int i; 230 int i;
223 bool failed = false; 231 bool failed = false;
@@ -226,7 +234,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
226 if (mm == &init_mm) 234 if (mm == &init_mm)
227 gfp &= ~__GFP_ACCOUNT; 235 gfp &= ~__GFP_ACCOUNT;
228 236
229 for(i = 0; i < PREALLOCATED_PMDS; i++) { 237 for (i = 0; i < count; i++) {
230 pmd_t *pmd = (pmd_t *)__get_free_page(gfp); 238 pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
231 if (!pmd) 239 if (!pmd)
232 failed = true; 240 failed = true;
@@ -241,7 +249,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
241 } 249 }
242 250
243 if (failed) { 251 if (failed) {
244 free_pmds(mm, pmds); 252 free_pmds(mm, pmds, count);
245 return -ENOMEM; 253 return -ENOMEM;
246 } 254 }
247 255
@@ -254,23 +262,38 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
254 * preallocate which never got a corresponding vma will need to be 262 * preallocate which never got a corresponding vma will need to be
255 * freed manually. 263 * freed manually.
256 */ 264 */
265static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
266{
267 pgd_t pgd = *pgdp;
268
269 if (pgd_val(pgd) != 0) {
270 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
271
272 *pgdp = native_make_pgd(0);
273
274 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
275 pmd_free(mm, pmd);
276 mm_dec_nr_pmds(mm);
277 }
278}
279
257static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) 280static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
258{ 281{
259 int i; 282 int i;
260 283
261 for(i = 0; i < PREALLOCATED_PMDS; i++) { 284 for (i = 0; i < PREALLOCATED_PMDS; i++)
262 pgd_t pgd = pgdp[i]; 285 mop_up_one_pmd(mm, &pgdp[i]);
263 286
264 if (pgd_val(pgd) != 0) { 287#ifdef CONFIG_PAGE_TABLE_ISOLATION
265 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
266 288
267 pgdp[i] = native_make_pgd(0); 289 if (!static_cpu_has(X86_FEATURE_PTI))
290 return;
268 291
269 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); 292 pgdp = kernel_to_user_pgdp(pgdp);
270 pmd_free(mm, pmd); 293
271 mm_dec_nr_pmds(mm); 294 for (i = 0; i < PREALLOCATED_USER_PMDS; i++)
272 } 295 mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]);
273 } 296#endif
274} 297}
275 298
276static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) 299static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
@@ -296,6 +319,38 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
296 } 319 }
297} 320}
298 321
322#ifdef CONFIG_PAGE_TABLE_ISOLATION
323static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
324 pgd_t *k_pgd, pmd_t *pmds[])
325{
326 pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir);
327 pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
328 p4d_t *u_p4d;
329 pud_t *u_pud;
330 int i;
331
332 u_p4d = p4d_offset(u_pgd, 0);
333 u_pud = pud_offset(u_p4d, 0);
334
335 s_pgd += KERNEL_PGD_BOUNDARY;
336 u_pud += KERNEL_PGD_BOUNDARY;
337
338 for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) {
339 pmd_t *pmd = pmds[i];
340
341 memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd),
342 sizeof(pmd_t) * PTRS_PER_PMD);
343
344 pud_populate(mm, u_pud, pmd);
345 }
346
347}
348#else
349static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
350 pgd_t *k_pgd, pmd_t *pmds[])
351{
352}
353#endif
299/* 354/*
300 * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also 355 * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
301 * assumes that pgd should be in one page. 356 * assumes that pgd should be in one page.
@@ -329,9 +384,6 @@ static int __init pgd_cache_init(void)
329 */ 384 */
330 pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN, 385 pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
331 SLAB_PANIC, NULL); 386 SLAB_PANIC, NULL);
332 if (!pgd_cache)
333 return -ENOMEM;
334
335 return 0; 387 return 0;
336} 388}
337core_initcall(pgd_cache_init); 389core_initcall(pgd_cache_init);
@@ -343,7 +395,8 @@ static inline pgd_t *_pgd_alloc(void)
343 * We allocate one page for pgd. 395 * We allocate one page for pgd.
344 */ 396 */
345 if (!SHARED_KERNEL_PMD) 397 if (!SHARED_KERNEL_PMD)
346 return (pgd_t *)__get_free_page(PGALLOC_GFP); 398 return (pgd_t *)__get_free_pages(PGALLOC_GFP,
399 PGD_ALLOCATION_ORDER);
347 400
348 /* 401 /*
349 * Now PAE kernel is not running as a Xen domain. We can allocate 402 * Now PAE kernel is not running as a Xen domain. We can allocate
@@ -355,7 +408,7 @@ static inline pgd_t *_pgd_alloc(void)
355static inline void _pgd_free(pgd_t *pgd) 408static inline void _pgd_free(pgd_t *pgd)
356{ 409{
357 if (!SHARED_KERNEL_PMD) 410 if (!SHARED_KERNEL_PMD)
358 free_page((unsigned long)pgd); 411 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
359 else 412 else
360 kmem_cache_free(pgd_cache, pgd); 413 kmem_cache_free(pgd_cache, pgd);
361} 414}
@@ -375,6 +428,7 @@ static inline void _pgd_free(pgd_t *pgd)
375pgd_t *pgd_alloc(struct mm_struct *mm) 428pgd_t *pgd_alloc(struct mm_struct *mm)
376{ 429{
377 pgd_t *pgd; 430 pgd_t *pgd;
431 pmd_t *u_pmds[PREALLOCATED_USER_PMDS];
378 pmd_t *pmds[PREALLOCATED_PMDS]; 432 pmd_t *pmds[PREALLOCATED_PMDS];
379 433
380 pgd = _pgd_alloc(); 434 pgd = _pgd_alloc();
@@ -384,12 +438,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
384 438
385 mm->pgd = pgd; 439 mm->pgd = pgd;
386 440
387 if (preallocate_pmds(mm, pmds) != 0) 441 if (preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0)
388 goto out_free_pgd; 442 goto out_free_pgd;
389 443
390 if (paravirt_pgd_alloc(mm) != 0) 444 if (preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0)
391 goto out_free_pmds; 445 goto out_free_pmds;
392 446
447 if (paravirt_pgd_alloc(mm) != 0)
448 goto out_free_user_pmds;
449
393 /* 450 /*
394 * Make sure that pre-populating the pmds is atomic with 451 * Make sure that pre-populating the pmds is atomic with
395 * respect to anything walking the pgd_list, so that they 452 * respect to anything walking the pgd_list, so that they
@@ -399,13 +456,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
399 456
400 pgd_ctor(mm, pgd); 457 pgd_ctor(mm, pgd);
401 pgd_prepopulate_pmd(mm, pgd, pmds); 458 pgd_prepopulate_pmd(mm, pgd, pmds);
459 pgd_prepopulate_user_pmd(mm, pgd, u_pmds);
402 460
403 spin_unlock(&pgd_lock); 461 spin_unlock(&pgd_lock);
404 462
405 return pgd; 463 return pgd;
406 464
465out_free_user_pmds:
466 free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS);
407out_free_pmds: 467out_free_pmds:
408 free_pmds(mm, pmds); 468 free_pmds(mm, pmds, PREALLOCATED_PMDS);
409out_free_pgd: 469out_free_pgd:
410 _pgd_free(pgd); 470 _pgd_free(pgd);
411out: 471out:
@@ -719,28 +779,50 @@ int pmd_clear_huge(pmd_t *pmd)
719 return 0; 779 return 0;
720} 780}
721 781
782#ifdef CONFIG_X86_64
722/** 783/**
723 * pud_free_pmd_page - Clear pud entry and free pmd page. 784 * pud_free_pmd_page - Clear pud entry and free pmd page.
724 * @pud: Pointer to a PUD. 785 * @pud: Pointer to a PUD.
786 * @addr: Virtual address associated with pud.
725 * 787 *
726 * Context: The pud range has been unmaped and TLB purged. 788 * Context: The pud range has been unmapped and TLB purged.
727 * Return: 1 if clearing the entry succeeded. 0 otherwise. 789 * Return: 1 if clearing the entry succeeded. 0 otherwise.
790 *
791 * NOTE: Callers must allow a single page allocation.
728 */ 792 */
729int pud_free_pmd_page(pud_t *pud) 793int pud_free_pmd_page(pud_t *pud, unsigned long addr)
730{ 794{
731 pmd_t *pmd; 795 pmd_t *pmd, *pmd_sv;
796 pte_t *pte;
732 int i; 797 int i;
733 798
734 if (pud_none(*pud)) 799 if (pud_none(*pud))
735 return 1; 800 return 1;
736 801
737 pmd = (pmd_t *)pud_page_vaddr(*pud); 802 pmd = (pmd_t *)pud_page_vaddr(*pud);
803 pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
804 if (!pmd_sv)
805 return 0;
738 806
739 for (i = 0; i < PTRS_PER_PMD; i++) 807 for (i = 0; i < PTRS_PER_PMD; i++) {
740 if (!pmd_free_pte_page(&pmd[i])) 808 pmd_sv[i] = pmd[i];
741 return 0; 809 if (!pmd_none(pmd[i]))
810 pmd_clear(&pmd[i]);
811 }
742 812
743 pud_clear(pud); 813 pud_clear(pud);
814
815 /* INVLPG to clear all paging-structure caches */
816 flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
817
818 for (i = 0; i < PTRS_PER_PMD; i++) {
819 if (!pmd_none(pmd_sv[i])) {
820 pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
821 free_page((unsigned long)pte);
822 }
823 }
824
825 free_page((unsigned long)pmd_sv);
744 free_page((unsigned long)pmd); 826 free_page((unsigned long)pmd);
745 827
746 return 1; 828 return 1;
@@ -749,11 +831,12 @@ int pud_free_pmd_page(pud_t *pud)
749/** 831/**
750 * pmd_free_pte_page - Clear pmd entry and free pte page. 832 * pmd_free_pte_page - Clear pmd entry and free pte page.
751 * @pmd: Pointer to a PMD. 833 * @pmd: Pointer to a PMD.
834 * @addr: Virtual address associated with pmd.
752 * 835 *
753 * Context: The pmd range has been unmaped and TLB purged. 836 * Context: The pmd range has been unmapped and TLB purged.
754 * Return: 1 if clearing the entry succeeded. 0 otherwise. 837 * Return: 1 if clearing the entry succeeded. 0 otherwise.
755 */ 838 */
756int pmd_free_pte_page(pmd_t *pmd) 839int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
757{ 840{
758 pte_t *pte; 841 pte_t *pte;
759 842
@@ -762,8 +845,30 @@ int pmd_free_pte_page(pmd_t *pmd)
762 845
763 pte = (pte_t *)pmd_page_vaddr(*pmd); 846 pte = (pte_t *)pmd_page_vaddr(*pmd);
764 pmd_clear(pmd); 847 pmd_clear(pmd);
848
849 /* INVLPG to clear all paging-structure caches */
850 flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
851
765 free_page((unsigned long)pte); 852 free_page((unsigned long)pte);
766 853
767 return 1; 854 return 1;
768} 855}
856
857#else /* !CONFIG_X86_64 */
858
859int pud_free_pmd_page(pud_t *pud, unsigned long addr)
860{
861 return pud_none(*pud);
862}
863
864/*
865 * Disable free page handling on x86-PAE. This assures that ioremap()
866 * does not update sync'd pmd entries. See vmalloc_sync_one().
867 */
868int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
869{
870 return pmd_none(*pmd);
871}
872
873#endif /* CONFIG_X86_64 */
769#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 874#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 4d418e705878..d58b4aba9510 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -54,6 +54,16 @@
54#define __GFP_NOTRACK 0 54#define __GFP_NOTRACK 0
55#endif 55#endif
56 56
57/*
58 * Define the page-table levels we clone for user-space on 32
59 * and 64 bit.
60 */
61#ifdef CONFIG_X86_64
62#define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PMD
63#else
64#define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PTE
65#endif
66
57static void __init pti_print_if_insecure(const char *reason) 67static void __init pti_print_if_insecure(const char *reason)
58{ 68{
59 if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) 69 if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
@@ -117,7 +127,7 @@ enable:
117 setup_force_cpu_cap(X86_FEATURE_PTI); 127 setup_force_cpu_cap(X86_FEATURE_PTI);
118} 128}
119 129
120pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) 130pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
121{ 131{
122 /* 132 /*
123 * Changes to the high (kernel) portion of the kernelmode page 133 * Changes to the high (kernel) portion of the kernelmode page
@@ -176,7 +186,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
176 186
177 if (pgd_none(*pgd)) { 187 if (pgd_none(*pgd)) {
178 unsigned long new_p4d_page = __get_free_page(gfp); 188 unsigned long new_p4d_page = __get_free_page(gfp);
179 if (!new_p4d_page) 189 if (WARN_ON_ONCE(!new_p4d_page))
180 return NULL; 190 return NULL;
181 191
182 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 192 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
@@ -195,13 +205,17 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
195static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) 205static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
196{ 206{
197 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); 207 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
198 p4d_t *p4d = pti_user_pagetable_walk_p4d(address); 208 p4d_t *p4d;
199 pud_t *pud; 209 pud_t *pud;
200 210
211 p4d = pti_user_pagetable_walk_p4d(address);
212 if (!p4d)
213 return NULL;
214
201 BUILD_BUG_ON(p4d_large(*p4d) != 0); 215 BUILD_BUG_ON(p4d_large(*p4d) != 0);
202 if (p4d_none(*p4d)) { 216 if (p4d_none(*p4d)) {
203 unsigned long new_pud_page = __get_free_page(gfp); 217 unsigned long new_pud_page = __get_free_page(gfp);
204 if (!new_pud_page) 218 if (WARN_ON_ONCE(!new_pud_page))
205 return NULL; 219 return NULL;
206 220
207 set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); 221 set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
@@ -215,7 +229,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
215 } 229 }
216 if (pud_none(*pud)) { 230 if (pud_none(*pud)) {
217 unsigned long new_pmd_page = __get_free_page(gfp); 231 unsigned long new_pmd_page = __get_free_page(gfp);
218 if (!new_pmd_page) 232 if (WARN_ON_ONCE(!new_pmd_page))
219 return NULL; 233 return NULL;
220 234
221 set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); 235 set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
@@ -224,7 +238,6 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
224 return pmd_offset(pud, address); 238 return pmd_offset(pud, address);
225} 239}
226 240
227#ifdef CONFIG_X86_VSYSCALL_EMULATION
228/* 241/*
229 * Walk the shadow copy of the page tables (optionally) trying to allocate 242 * Walk the shadow copy of the page tables (optionally) trying to allocate
230 * page table pages on the way down. Does not support large pages. 243 * page table pages on the way down. Does not support large pages.
@@ -237,9 +250,13 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
237static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) 250static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
238{ 251{
239 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); 252 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
240 pmd_t *pmd = pti_user_pagetable_walk_pmd(address); 253 pmd_t *pmd;
241 pte_t *pte; 254 pte_t *pte;
242 255
256 pmd = pti_user_pagetable_walk_pmd(address);
257 if (!pmd)
258 return NULL;
259
243 /* We can't do anything sensible if we hit a large mapping. */ 260 /* We can't do anything sensible if we hit a large mapping. */
244 if (pmd_large(*pmd)) { 261 if (pmd_large(*pmd)) {
245 WARN_ON(1); 262 WARN_ON(1);
@@ -262,6 +279,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
262 return pte; 279 return pte;
263} 280}
264 281
282#ifdef CONFIG_X86_VSYSCALL_EMULATION
265static void __init pti_setup_vsyscall(void) 283static void __init pti_setup_vsyscall(void)
266{ 284{
267 pte_t *pte, *target_pte; 285 pte_t *pte, *target_pte;
@@ -282,8 +300,14 @@ static void __init pti_setup_vsyscall(void)
282static void __init pti_setup_vsyscall(void) { } 300static void __init pti_setup_vsyscall(void) { }
283#endif 301#endif
284 302
303enum pti_clone_level {
304 PTI_CLONE_PMD,
305 PTI_CLONE_PTE,
306};
307
285static void 308static void
286pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) 309pti_clone_pgtable(unsigned long start, unsigned long end,
310 enum pti_clone_level level)
287{ 311{
288 unsigned long addr; 312 unsigned long addr;
289 313
@@ -291,59 +315,105 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
291 * Clone the populated PMDs which cover start to end. These PMD areas 315 * Clone the populated PMDs which cover start to end. These PMD areas
292 * can have holes. 316 * can have holes.
293 */ 317 */
294 for (addr = start; addr < end; addr += PMD_SIZE) { 318 for (addr = start; addr < end;) {
319 pte_t *pte, *target_pte;
295 pmd_t *pmd, *target_pmd; 320 pmd_t *pmd, *target_pmd;
296 pgd_t *pgd; 321 pgd_t *pgd;
297 p4d_t *p4d; 322 p4d_t *p4d;
298 pud_t *pud; 323 pud_t *pud;
299 324
325 /* Overflow check */
326 if (addr < start)
327 break;
328
300 pgd = pgd_offset_k(addr); 329 pgd = pgd_offset_k(addr);
301 if (WARN_ON(pgd_none(*pgd))) 330 if (WARN_ON(pgd_none(*pgd)))
302 return; 331 return;
303 p4d = p4d_offset(pgd, addr); 332 p4d = p4d_offset(pgd, addr);
304 if (WARN_ON(p4d_none(*p4d))) 333 if (WARN_ON(p4d_none(*p4d)))
305 return; 334 return;
335
306 pud = pud_offset(p4d, addr); 336 pud = pud_offset(p4d, addr);
307 if (pud_none(*pud)) 337 if (pud_none(*pud)) {
338 addr += PUD_SIZE;
308 continue; 339 continue;
340 }
341
309 pmd = pmd_offset(pud, addr); 342 pmd = pmd_offset(pud, addr);
310 if (pmd_none(*pmd)) 343 if (pmd_none(*pmd)) {
344 addr += PMD_SIZE;
311 continue; 345 continue;
346 }
312 347
313 target_pmd = pti_user_pagetable_walk_pmd(addr); 348 if (pmd_large(*pmd) || level == PTI_CLONE_PMD) {
314 if (WARN_ON(!target_pmd)) 349 target_pmd = pti_user_pagetable_walk_pmd(addr);
315 return; 350 if (WARN_ON(!target_pmd))
316 351 return;
317 /* 352
318 * Only clone present PMDs. This ensures only setting 353 /*
319 * _PAGE_GLOBAL on present PMDs. This should only be 354 * Only clone present PMDs. This ensures only setting
320 * called on well-known addresses anyway, so a non- 355 * _PAGE_GLOBAL on present PMDs. This should only be
321 * present PMD would be a surprise. 356 * called on well-known addresses anyway, so a non-
322 */ 357 * present PMD would be a surprise.
323 if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT))) 358 */
324 return; 359 if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
325 360 return;
326 /* 361
327 * Setting 'target_pmd' below creates a mapping in both 362 /*
328 * the user and kernel page tables. It is effectively 363 * Setting 'target_pmd' below creates a mapping in both
329 * global, so set it as global in both copies. Note: 364 * the user and kernel page tables. It is effectively
330 * the X86_FEATURE_PGE check is not _required_ because 365 * global, so set it as global in both copies. Note:
331 * the CPU ignores _PAGE_GLOBAL when PGE is not 366 * the X86_FEATURE_PGE check is not _required_ because
332 * supported. The check keeps consistentency with 367 * the CPU ignores _PAGE_GLOBAL when PGE is not
333 * code that only set this bit when supported. 368 * supported. The check keeps consistentency with
334 */ 369 * code that only set this bit when supported.
335 if (boot_cpu_has(X86_FEATURE_PGE)) 370 */
336 *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL); 371 if (boot_cpu_has(X86_FEATURE_PGE))
337 372 *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
338 /* 373
339 * Copy the PMD. That is, the kernelmode and usermode 374 /*
340 * tables will share the last-level page tables of this 375 * Copy the PMD. That is, the kernelmode and usermode
341 * address range 376 * tables will share the last-level page tables of this
342 */ 377 * address range
343 *target_pmd = pmd_clear_flags(*pmd, clear); 378 */
379 *target_pmd = *pmd;
380
381 addr += PMD_SIZE;
382
383 } else if (level == PTI_CLONE_PTE) {
384
385 /* Walk the page-table down to the pte level */
386 pte = pte_offset_kernel(pmd, addr);
387 if (pte_none(*pte)) {
388 addr += PAGE_SIZE;
389 continue;
390 }
391
392 /* Only clone present PTEs */
393 if (WARN_ON(!(pte_flags(*pte) & _PAGE_PRESENT)))
394 return;
395
396 /* Allocate PTE in the user page-table */
397 target_pte = pti_user_pagetable_walk_pte(addr);
398 if (WARN_ON(!target_pte))
399 return;
400
401 /* Set GLOBAL bit in both PTEs */
402 if (boot_cpu_has(X86_FEATURE_PGE))
403 *pte = pte_set_flags(*pte, _PAGE_GLOBAL);
404
405 /* Clone the PTE */
406 *target_pte = *pte;
407
408 addr += PAGE_SIZE;
409
410 } else {
411 BUG();
412 }
344 } 413 }
345} 414}
346 415
416#ifdef CONFIG_X86_64
347/* 417/*
348 * Clone a single p4d (i.e. a top-level entry on 4-level systems and a 418 * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
349 * next-level entry on 5-level systems. 419 * next-level entry on 5-level systems.
@@ -354,6 +424,9 @@ static void __init pti_clone_p4d(unsigned long addr)
354 pgd_t *kernel_pgd; 424 pgd_t *kernel_pgd;
355 425
356 user_p4d = pti_user_pagetable_walk_p4d(addr); 426 user_p4d = pti_user_pagetable_walk_p4d(addr);
427 if (!user_p4d)
428 return;
429
357 kernel_pgd = pgd_offset_k(addr); 430 kernel_pgd = pgd_offset_k(addr);
358 kernel_p4d = p4d_offset(kernel_pgd, addr); 431 kernel_p4d = p4d_offset(kernel_pgd, addr);
359 *user_p4d = *kernel_p4d; 432 *user_p4d = *kernel_p4d;
@@ -367,6 +440,25 @@ static void __init pti_clone_user_shared(void)
367 pti_clone_p4d(CPU_ENTRY_AREA_BASE); 440 pti_clone_p4d(CPU_ENTRY_AREA_BASE);
368} 441}
369 442
443#else /* CONFIG_X86_64 */
444
445/*
446 * On 32 bit PAE systems with 1GB of Kernel address space there is only
447 * one pgd/p4d for the whole kernel. Cloning that would map the whole
448 * address space into the user page-tables, making PTI useless. So clone
449 * the page-table on the PMD level to prevent that.
450 */
451static void __init pti_clone_user_shared(void)
452{
453 unsigned long start, end;
454
455 start = CPU_ENTRY_AREA_BASE;
456 end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);
457
458 pti_clone_pgtable(start, end, PTI_CLONE_PMD);
459}
460#endif /* CONFIG_X86_64 */
461
370/* 462/*
371 * Clone the ESPFIX P4D into the user space visible page table 463 * Clone the ESPFIX P4D into the user space visible page table
372 */ 464 */
@@ -380,11 +472,11 @@ static void __init pti_setup_espfix64(void)
380/* 472/*
381 * Clone the populated PMDs of the entry and irqentry text and force it RO. 473 * Clone the populated PMDs of the entry and irqentry text and force it RO.
382 */ 474 */
383static void __init pti_clone_entry_text(void) 475static void pti_clone_entry_text(void)
384{ 476{
385 pti_clone_pmds((unsigned long) __entry_text_start, 477 pti_clone_pgtable((unsigned long) __entry_text_start,
386 (unsigned long) __irqentry_text_end, 478 (unsigned long) __irqentry_text_end,
387 _PAGE_RW); 479 PTI_CLONE_PMD);
388} 480}
389 481
390/* 482/*
@@ -435,10 +527,17 @@ static inline bool pti_kernel_image_global_ok(void)
435} 527}
436 528
437/* 529/*
530 * This is the only user for these and it is not arch-generic
531 * like the other set_memory.h functions. Just extern them.
532 */
533extern int set_memory_nonglobal(unsigned long addr, int numpages);
534extern int set_memory_global(unsigned long addr, int numpages);
535
536/*
438 * For some configurations, map all of kernel text into the user page 537 * For some configurations, map all of kernel text into the user page
439 * tables. This reduces TLB misses, especially on non-PCID systems. 538 * tables. This reduces TLB misses, especially on non-PCID systems.
440 */ 539 */
441void pti_clone_kernel_text(void) 540static void pti_clone_kernel_text(void)
442{ 541{
443 /* 542 /*
444 * rodata is part of the kernel image and is normally 543 * rodata is part of the kernel image and is normally
@@ -446,7 +545,8 @@ void pti_clone_kernel_text(void)
446 * clone the areas past rodata, they might contain secrets. 545 * clone the areas past rodata, they might contain secrets.
447 */ 546 */
448 unsigned long start = PFN_ALIGN(_text); 547 unsigned long start = PFN_ALIGN(_text);
449 unsigned long end = (unsigned long)__end_rodata_hpage_align; 548 unsigned long end_clone = (unsigned long)__end_rodata_aligned;
549 unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table);
450 550
451 if (!pti_kernel_image_global_ok()) 551 if (!pti_kernel_image_global_ok())
452 return; 552 return;
@@ -458,14 +558,18 @@ void pti_clone_kernel_text(void)
458 * pti_set_kernel_image_nonglobal() did to clear the 558 * pti_set_kernel_image_nonglobal() did to clear the
459 * global bit. 559 * global bit.
460 */ 560 */
461 pti_clone_pmds(start, end, _PAGE_RW); 561 pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
562
563 /*
564 * pti_clone_pgtable() will set the global bit in any PMDs
565 * that it clones, but we also need to get any PTEs in
566 * the last level for areas that are not huge-page-aligned.
567 */
568
569 /* Set the global bit for normal non-__init kernel text: */
570 set_memory_global(start, (end_global - start) >> PAGE_SHIFT);
462} 571}
463 572
464/*
465 * This is the only user for it and it is not arch-generic like
466 * the other set_memory.h functions. Just extern it.
467 */
468extern int set_memory_nonglobal(unsigned long addr, int numpages);
469void pti_set_kernel_image_nonglobal(void) 573void pti_set_kernel_image_nonglobal(void)
470{ 574{
471 /* 575 /*
@@ -477,9 +581,11 @@ void pti_set_kernel_image_nonglobal(void)
477 unsigned long start = PFN_ALIGN(_text); 581 unsigned long start = PFN_ALIGN(_text);
478 unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); 582 unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
479 583
480 if (pti_kernel_image_global_ok()) 584 /*
481 return; 585 * This clears _PAGE_GLOBAL from the entire kernel image.
482 586 * pti_clone_kernel_text() map put _PAGE_GLOBAL back for
587 * areas that are mapped to userspace.
588 */
483 set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT); 589 set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
484} 590}
485 591
@@ -493,6 +599,28 @@ void __init pti_init(void)
493 599
494 pr_info("enabled\n"); 600 pr_info("enabled\n");
495 601
602#ifdef CONFIG_X86_32
603 /*
604 * We check for X86_FEATURE_PCID here. But the init-code will
605 * clear the feature flag on 32 bit because the feature is not
606 * supported on 32 bit anyway. To print the warning we need to
607 * check with cpuid directly again.
608 */
609 if (cpuid_ecx(0x1) & BIT(17)) {
610 /* Use printk to work around pr_fmt() */
611 printk(KERN_WARNING "\n");
612 printk(KERN_WARNING "************************************************************\n");
613 printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! **\n");
614 printk(KERN_WARNING "** **\n");
615 printk(KERN_WARNING "** You are using 32-bit PTI on a 64-bit PCID-capable CPU. **\n");
616 printk(KERN_WARNING "** Your performance will increase dramatically if you **\n");
617 printk(KERN_WARNING "** switch to a 64-bit kernel! **\n");
618 printk(KERN_WARNING "** **\n");
619 printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! **\n");
620 printk(KERN_WARNING "************************************************************\n");
621 }
622#endif
623
496 pti_clone_user_shared(); 624 pti_clone_user_shared();
497 625
498 /* Undo all global bits from the init pagetables in head_64.S: */ 626 /* Undo all global bits from the init pagetables in head_64.S: */
@@ -502,3 +630,22 @@ void __init pti_init(void)
502 pti_setup_espfix64(); 630 pti_setup_espfix64();
503 pti_setup_vsyscall(); 631 pti_setup_vsyscall();
504} 632}
633
634/*
635 * Finalize the kernel mappings in the userspace page-table. Some of the
636 * mappings for the kernel image might have changed since pti_init()
637 * cloned them. This is because parts of the kernel image have been
638 * mapped RO and/or NX. These changes need to be cloned again to the
639 * userspace page-table.
640 */
641void pti_finalize(void)
642{
643 /*
644 * We need to clone everything (again) that maps parts of the
645 * kernel image.
646 */
647 pti_clone_entry_text();
648 pti_clone_kernel_text();
649
650 debug_checkwx_user();
651}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6eb1f34c3c85..752dbf4e0e50 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -7,6 +7,7 @@
7#include <linux/export.h> 7#include <linux/export.h>
8#include <linux/cpu.h> 8#include <linux/cpu.h>
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/gfp.h>
10 11
11#include <asm/tlbflush.h> 12#include <asm/tlbflush.h>
12#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
@@ -35,7 +36,7 @@
35 * necessary invalidation by clearing out the 'ctx_id' which 36 * necessary invalidation by clearing out the 'ctx_id' which
36 * forces a TLB flush when the context is loaded. 37 * forces a TLB flush when the context is loaded.
37 */ 38 */
38void clear_asid_other(void) 39static void clear_asid_other(void)
39{ 40{
40 u16 asid; 41 u16 asid;
41 42
@@ -185,8 +186,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
185{ 186{
186 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); 187 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
187 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); 188 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
189 bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
188 unsigned cpu = smp_processor_id(); 190 unsigned cpu = smp_processor_id();
189 u64 next_tlb_gen; 191 u64 next_tlb_gen;
192 bool need_flush;
193 u16 new_asid;
190 194
191 /* 195 /*
192 * NB: The scheduler will call us with prev == next when switching 196 * NB: The scheduler will call us with prev == next when switching
@@ -240,20 +244,41 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
240 next->context.ctx_id); 244 next->context.ctx_id);
241 245
242 /* 246 /*
243 * We don't currently support having a real mm loaded without 247 * Even in lazy TLB mode, the CPU should stay set in the
244 * our cpu set in mm_cpumask(). We have all the bookkeeping 248 * mm_cpumask. The TLB shootdown code can figure out from
245 * in place to figure out whether we would need to flush 249 * from cpu_tlbstate.is_lazy whether or not to send an IPI.
246 * if our cpu were cleared in mm_cpumask(), but we don't
247 * currently use it.
248 */ 250 */
249 if (WARN_ON_ONCE(real_prev != &init_mm && 251 if (WARN_ON_ONCE(real_prev != &init_mm &&
250 !cpumask_test_cpu(cpu, mm_cpumask(next)))) 252 !cpumask_test_cpu(cpu, mm_cpumask(next))))
251 cpumask_set_cpu(cpu, mm_cpumask(next)); 253 cpumask_set_cpu(cpu, mm_cpumask(next));
252 254
253 return; 255 /*
256 * If the CPU is not in lazy TLB mode, we are just switching
257 * from one thread in a process to another thread in the same
258 * process. No TLB flush required.
259 */
260 if (!was_lazy)
261 return;
262
263 /*
264 * Read the tlb_gen to check whether a flush is needed.
265 * If the TLB is up to date, just use it.
266 * The barrier synchronizes with the tlb_gen increment in
267 * the TLB shootdown code.
268 */
269 smp_mb();
270 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
271 if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
272 next_tlb_gen)
273 return;
274
275 /*
276 * TLB contents went out of date while we were in lazy
277 * mode. Fall through to the TLB switching code below.
278 */
279 new_asid = prev_asid;
280 need_flush = true;
254 } else { 281 } else {
255 u16 new_asid;
256 bool need_flush;
257 u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); 282 u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
258 283
259 /* 284 /*
@@ -285,53 +310,60 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
285 sync_current_stack_to_mm(next); 310 sync_current_stack_to_mm(next);
286 } 311 }
287 312
288 /* Stop remote flushes for the previous mm */ 313 /*
289 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) && 314 * Stop remote flushes for the previous mm.
290 real_prev != &init_mm); 315 * Skip kernel threads; we never send init_mm TLB flushing IPIs,
291 cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); 316 * but the bitmap manipulation can cause cache line contention.
317 */
318 if (real_prev != &init_mm) {
319 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
320 mm_cpumask(real_prev)));
321 cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
322 }
292 323
293 /* 324 /*
294 * Start remote flushes and then read tlb_gen. 325 * Start remote flushes and then read tlb_gen.
295 */ 326 */
296 cpumask_set_cpu(cpu, mm_cpumask(next)); 327 if (next != &init_mm)
328 cpumask_set_cpu(cpu, mm_cpumask(next));
297 next_tlb_gen = atomic64_read(&next->context.tlb_gen); 329 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
298 330
299 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); 331 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
332 }
300 333
301 if (need_flush) { 334 if (need_flush) {
302 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); 335 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
303 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); 336 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
304 load_new_mm_cr3(next->pgd, new_asid, true); 337 load_new_mm_cr3(next->pgd, new_asid, true);
305
306 /*
307 * NB: This gets called via leave_mm() in the idle path
308 * where RCU functions differently. Tracing normally
309 * uses RCU, so we need to use the _rcuidle variant.
310 *
311 * (There is no good reason for this. The idle code should
312 * be rearranged to call this before rcu_idle_enter().)
313 */
314 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
315 } else {
316 /* The new ASID is already up to date. */
317 load_new_mm_cr3(next->pgd, new_asid, false);
318
319 /* See above wrt _rcuidle. */
320 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
321 }
322 338
323 /* 339 /*
324 * Record last user mm's context id, so we can avoid 340 * NB: This gets called via leave_mm() in the idle path
325 * flushing branch buffer with IBPB if we switch back 341 * where RCU functions differently. Tracing normally
326 * to the same user. 342 * uses RCU, so we need to use the _rcuidle variant.
343 *
344 * (There is no good reason for this. The idle code should
345 * be rearranged to call this before rcu_idle_enter().)
327 */ 346 */
328 if (next != &init_mm) 347 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
329 this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); 348 } else {
349 /* The new ASID is already up to date. */
350 load_new_mm_cr3(next->pgd, new_asid, false);
330 351
331 this_cpu_write(cpu_tlbstate.loaded_mm, next); 352 /* See above wrt _rcuidle. */
332 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); 353 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
333 } 354 }
334 355
356 /*
357 * Record last user mm's context id, so we can avoid
358 * flushing branch buffer with IBPB if we switch back
359 * to the same user.
360 */
361 if (next != &init_mm)
362 this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
363
364 this_cpu_write(cpu_tlbstate.loaded_mm, next);
365 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
366
335 load_mm_cr4(next); 367 load_mm_cr4(next);
336 switch_ldt(real_prev, next); 368 switch_ldt(real_prev, next);
337} 369}
@@ -354,20 +386,7 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
354 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) 386 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
355 return; 387 return;
356 388
357 if (tlb_defer_switch_to_init_mm()) { 389 this_cpu_write(cpu_tlbstate.is_lazy, true);
358 /*
359 * There's a significant optimization that may be possible
360 * here. We have accurate enough TLB flush tracking that we
361 * don't need to maintain coherence of TLB per se when we're
362 * lazy. We do, however, need to maintain coherence of
363 * paging-structure caches. We could, in principle, leave our
364 * old mm loaded and only switch to init_mm when
365 * tlb_remove_page() happens.
366 */
367 this_cpu_write(cpu_tlbstate.is_lazy, true);
368 } else {
369 switch_mm(NULL, &init_mm, NULL);
370 }
371} 390}
372 391
373/* 392/*
@@ -454,6 +473,9 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
454 * paging-structure cache to avoid speculatively reading 473 * paging-structure cache to avoid speculatively reading
455 * garbage into our TLB. Since switching to init_mm is barely 474 * garbage into our TLB. Since switching to init_mm is barely
456 * slower than a minimal flush, just switch to init_mm. 475 * slower than a minimal flush, just switch to init_mm.
476 *
477 * This should be rare, with native_flush_tlb_others skipping
478 * IPIs to lazy TLB mode CPUs.
457 */ 479 */
458 switch_mm_irqs_off(NULL, &init_mm, NULL); 480 switch_mm_irqs_off(NULL, &init_mm, NULL);
459 return; 481 return;
@@ -560,6 +582,9 @@ static void flush_tlb_func_remote(void *info)
560void native_flush_tlb_others(const struct cpumask *cpumask, 582void native_flush_tlb_others(const struct cpumask *cpumask,
561 const struct flush_tlb_info *info) 583 const struct flush_tlb_info *info)
562{ 584{
585 cpumask_var_t lazymask;
586 unsigned int cpu;
587
563 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 588 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
564 if (info->end == TLB_FLUSH_ALL) 589 if (info->end == TLB_FLUSH_ALL)
565 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); 590 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
@@ -583,8 +608,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
583 * that UV should be updated so that smp_call_function_many(), 608 * that UV should be updated so that smp_call_function_many(),
584 * etc, are optimal on UV. 609 * etc, are optimal on UV.
585 */ 610 */
586 unsigned int cpu;
587
588 cpu = smp_processor_id(); 611 cpu = smp_processor_id();
589 cpumask = uv_flush_tlb_others(cpumask, info); 612 cpumask = uv_flush_tlb_others(cpumask, info);
590 if (cpumask) 613 if (cpumask)
@@ -592,8 +615,29 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
592 (void *)info, 1); 615 (void *)info, 1);
593 return; 616 return;
594 } 617 }
595 smp_call_function_many(cpumask, flush_tlb_func_remote, 618
619 /*
620 * A temporary cpumask is used in order to skip sending IPIs
621 * to CPUs in lazy TLB state, while keeping them in mm_cpumask(mm).
622 * If the allocation fails, simply IPI every CPU in mm_cpumask.
623 */
624 if (!alloc_cpumask_var(&lazymask, GFP_ATOMIC)) {
625 smp_call_function_many(cpumask, flush_tlb_func_remote,
596 (void *)info, 1); 626 (void *)info, 1);
627 return;
628 }
629
630 cpumask_copy(lazymask, cpumask);
631
632 for_each_cpu(cpu, lazymask) {
633 if (per_cpu(cpu_tlbstate.is_lazy, cpu))
634 cpumask_clear_cpu(cpu, lazymask);
635 }
636
637 smp_call_function_many(lazymask, flush_tlb_func_remote,
638 (void *)info, 1);
639
640 free_cpumask_var(lazymask);
597} 641}
598 642
599/* 643/*
@@ -646,6 +690,68 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
646 put_cpu(); 690 put_cpu();
647} 691}
648 692
693void tlb_flush_remove_tables_local(void *arg)
694{
695 struct mm_struct *mm = arg;
696
697 if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm &&
698 this_cpu_read(cpu_tlbstate.is_lazy)) {
699 /*
700 * We're in lazy mode. We need to at least flush our
701 * paging-structure cache to avoid speculatively reading
702 * garbage into our TLB. Since switching to init_mm is barely
703 * slower than a minimal flush, just switch to init_mm.
704 */
705 switch_mm_irqs_off(NULL, &init_mm, NULL);
706 }
707}
708
709static void mm_fill_lazy_tlb_cpu_mask(struct mm_struct *mm,
710 struct cpumask *lazy_cpus)
711{
712 int cpu;
713
714 for_each_cpu(cpu, mm_cpumask(mm)) {
715 if (!per_cpu(cpu_tlbstate.is_lazy, cpu))
716 cpumask_set_cpu(cpu, lazy_cpus);
717 }
718}
719
720void tlb_flush_remove_tables(struct mm_struct *mm)
721{
722 int cpu = get_cpu();
723 cpumask_var_t lazy_cpus;
724
725 if (cpumask_any_but(mm_cpumask(mm), cpu) >= nr_cpu_ids) {
726 put_cpu();
727 return;
728 }
729
730 if (!zalloc_cpumask_var(&lazy_cpus, GFP_ATOMIC)) {
731 /*
732 * If the cpumask allocation fails, do a brute force flush
733 * on all the CPUs that have this mm loaded.
734 */
735 smp_call_function_many(mm_cpumask(mm),
736 tlb_flush_remove_tables_local, (void *)mm, 1);
737 put_cpu();
738 return;
739 }
740
741 /*
742 * CPUs with !is_lazy either received a TLB flush IPI while the user
743 * pages in this address range were unmapped, or have context switched
744 * and reloaded %CR3 since then.
745 *
746 * Shootdown IPIs at page table freeing time only need to be sent to
747 * CPUs that may have out of date TLB contents.
748 */
749 mm_fill_lazy_tlb_cpu_mask(mm, lazy_cpus);
750 smp_call_function_many(lazy_cpus,
751 tlb_flush_remove_tables_local, (void *)mm, 1);
752 free_cpumask_var(lazy_cpus);
753 put_cpu();
754}
649 755
650static void do_flush_tlb_all(void *info) 756static void do_flush_tlb_all(void *info)
651{ 757{
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 55799873ebe5..8f6cc71e0848 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1441,8 +1441,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
1441 1441
1442 /* sub esp,STACK_SIZE */ 1442 /* sub esp,STACK_SIZE */
1443 EMIT2_off32(0x81, 0xEC, STACK_SIZE); 1443 EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1444 /* sub ebp,SCRATCH_SIZE+4+12*/ 1444 /* sub ebp,SCRATCH_SIZE+12*/
1445 EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16); 1445 EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
1446 /* xor ebx,ebx */ 1446 /* xor ebx,ebx */
1447 EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX)); 1447 EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1448 1448
@@ -1475,8 +1475,8 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
1475 /* mov edx,dword ptr [ebp+off]*/ 1475 /* mov edx,dword ptr [ebp+off]*/
1476 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1])); 1476 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1477 1477
1478 /* add ebp,SCRATCH_SIZE+4+12*/ 1478 /* add ebp,SCRATCH_SIZE+12*/
1479 EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16); 1479 EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
1480 1480
1481 /* mov ebx,dword ptr [ebp-12]*/ 1481 /* mov ebx,dword ptr [ebp-12]*/
1482 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12); 1482 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 5f2eb3231607..ee5d08f25ce4 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -636,6 +636,8 @@ void efi_switch_mm(struct mm_struct *mm)
636#ifdef CONFIG_EFI_MIXED 636#ifdef CONFIG_EFI_MIXED
637extern efi_status_t efi64_thunk(u32, ...); 637extern efi_status_t efi64_thunk(u32, ...);
638 638
639static DEFINE_SPINLOCK(efi_runtime_lock);
640
639#define runtime_service32(func) \ 641#define runtime_service32(func) \
640({ \ 642({ \
641 u32 table = (u32)(unsigned long)efi.systab; \ 643 u32 table = (u32)(unsigned long)efi.systab; \
@@ -657,17 +659,14 @@ extern efi_status_t efi64_thunk(u32, ...);
657#define efi_thunk(f, ...) \ 659#define efi_thunk(f, ...) \
658({ \ 660({ \
659 efi_status_t __s; \ 661 efi_status_t __s; \
660 unsigned long __flags; \
661 u32 __func; \ 662 u32 __func; \
662 \ 663 \
663 local_irq_save(__flags); \
664 arch_efi_call_virt_setup(); \ 664 arch_efi_call_virt_setup(); \
665 \ 665 \
666 __func = runtime_service32(f); \ 666 __func = runtime_service32(f); \
667 __s = efi64_thunk(__func, __VA_ARGS__); \ 667 __s = efi64_thunk(__func, __VA_ARGS__); \
668 \ 668 \
669 arch_efi_call_virt_teardown(); \ 669 arch_efi_call_virt_teardown(); \
670 local_irq_restore(__flags); \
671 \ 670 \
672 __s; \ 671 __s; \
673}) 672})
@@ -702,14 +701,17 @@ static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
702{ 701{
703 efi_status_t status; 702 efi_status_t status;
704 u32 phys_tm, phys_tc; 703 u32 phys_tm, phys_tc;
704 unsigned long flags;
705 705
706 spin_lock(&rtc_lock); 706 spin_lock(&rtc_lock);
707 spin_lock_irqsave(&efi_runtime_lock, flags);
707 708
708 phys_tm = virt_to_phys_or_null(tm); 709 phys_tm = virt_to_phys_or_null(tm);
709 phys_tc = virt_to_phys_or_null(tc); 710 phys_tc = virt_to_phys_or_null(tc);
710 711
711 status = efi_thunk(get_time, phys_tm, phys_tc); 712 status = efi_thunk(get_time, phys_tm, phys_tc);
712 713
714 spin_unlock_irqrestore(&efi_runtime_lock, flags);
713 spin_unlock(&rtc_lock); 715 spin_unlock(&rtc_lock);
714 716
715 return status; 717 return status;
@@ -719,13 +721,16 @@ static efi_status_t efi_thunk_set_time(efi_time_t *tm)
719{ 721{
720 efi_status_t status; 722 efi_status_t status;
721 u32 phys_tm; 723 u32 phys_tm;
724 unsigned long flags;
722 725
723 spin_lock(&rtc_lock); 726 spin_lock(&rtc_lock);
727 spin_lock_irqsave(&efi_runtime_lock, flags);
724 728
725 phys_tm = virt_to_phys_or_null(tm); 729 phys_tm = virt_to_phys_or_null(tm);
726 730
727 status = efi_thunk(set_time, phys_tm); 731 status = efi_thunk(set_time, phys_tm);
728 732
733 spin_unlock_irqrestore(&efi_runtime_lock, flags);
729 spin_unlock(&rtc_lock); 734 spin_unlock(&rtc_lock);
730 735
731 return status; 736 return status;
@@ -737,8 +742,10 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
737{ 742{
738 efi_status_t status; 743 efi_status_t status;
739 u32 phys_enabled, phys_pending, phys_tm; 744 u32 phys_enabled, phys_pending, phys_tm;
745 unsigned long flags;
740 746
741 spin_lock(&rtc_lock); 747 spin_lock(&rtc_lock);
748 spin_lock_irqsave(&efi_runtime_lock, flags);
742 749
743 phys_enabled = virt_to_phys_or_null(enabled); 750 phys_enabled = virt_to_phys_or_null(enabled);
744 phys_pending = virt_to_phys_or_null(pending); 751 phys_pending = virt_to_phys_or_null(pending);
@@ -747,6 +754,7 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
747 status = efi_thunk(get_wakeup_time, phys_enabled, 754 status = efi_thunk(get_wakeup_time, phys_enabled,
748 phys_pending, phys_tm); 755 phys_pending, phys_tm);
749 756
757 spin_unlock_irqrestore(&efi_runtime_lock, flags);
750 spin_unlock(&rtc_lock); 758 spin_unlock(&rtc_lock);
751 759
752 return status; 760 return status;
@@ -757,13 +765,16 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
757{ 765{
758 efi_status_t status; 766 efi_status_t status;
759 u32 phys_tm; 767 u32 phys_tm;
768 unsigned long flags;
760 769
761 spin_lock(&rtc_lock); 770 spin_lock(&rtc_lock);
771 spin_lock_irqsave(&efi_runtime_lock, flags);
762 772
763 phys_tm = virt_to_phys_or_null(tm); 773 phys_tm = virt_to_phys_or_null(tm);
764 774
765 status = efi_thunk(set_wakeup_time, enabled, phys_tm); 775 status = efi_thunk(set_wakeup_time, enabled, phys_tm);
766 776
777 spin_unlock_irqrestore(&efi_runtime_lock, flags);
767 spin_unlock(&rtc_lock); 778 spin_unlock(&rtc_lock);
768 779
769 return status; 780 return status;
@@ -781,6 +792,9 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
781 efi_status_t status; 792 efi_status_t status;
782 u32 phys_name, phys_vendor, phys_attr; 793 u32 phys_name, phys_vendor, phys_attr;
783 u32 phys_data_size, phys_data; 794 u32 phys_data_size, phys_data;
795 unsigned long flags;
796
797 spin_lock_irqsave(&efi_runtime_lock, flags);
784 798
785 phys_data_size = virt_to_phys_or_null(data_size); 799 phys_data_size = virt_to_phys_or_null(data_size);
786 phys_vendor = virt_to_phys_or_null(vendor); 800 phys_vendor = virt_to_phys_or_null(vendor);
@@ -791,6 +805,8 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
791 status = efi_thunk(get_variable, phys_name, phys_vendor, 805 status = efi_thunk(get_variable, phys_name, phys_vendor,
792 phys_attr, phys_data_size, phys_data); 806 phys_attr, phys_data_size, phys_data);
793 807
808 spin_unlock_irqrestore(&efi_runtime_lock, flags);
809
794 return status; 810 return status;
795} 811}
796 812
@@ -800,6 +816,34 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
800{ 816{
801 u32 phys_name, phys_vendor, phys_data; 817 u32 phys_name, phys_vendor, phys_data;
802 efi_status_t status; 818 efi_status_t status;
819 unsigned long flags;
820
821 spin_lock_irqsave(&efi_runtime_lock, flags);
822
823 phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
824 phys_vendor = virt_to_phys_or_null(vendor);
825 phys_data = virt_to_phys_or_null_size(data, data_size);
826
827 /* If data_size is > sizeof(u32) we've got problems */
828 status = efi_thunk(set_variable, phys_name, phys_vendor,
829 attr, data_size, phys_data);
830
831 spin_unlock_irqrestore(&efi_runtime_lock, flags);
832
833 return status;
834}
835
836static efi_status_t
837efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
838 u32 attr, unsigned long data_size,
839 void *data)
840{
841 u32 phys_name, phys_vendor, phys_data;
842 efi_status_t status;
843 unsigned long flags;
844
845 if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
846 return EFI_NOT_READY;
803 847
804 phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); 848 phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
805 phys_vendor = virt_to_phys_or_null(vendor); 849 phys_vendor = virt_to_phys_or_null(vendor);
@@ -809,6 +853,8 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
809 status = efi_thunk(set_variable, phys_name, phys_vendor, 853 status = efi_thunk(set_variable, phys_name, phys_vendor,
810 attr, data_size, phys_data); 854 attr, data_size, phys_data);
811 855
856 spin_unlock_irqrestore(&efi_runtime_lock, flags);
857
812 return status; 858 return status;
813} 859}
814 860
@@ -819,6 +865,9 @@ efi_thunk_get_next_variable(unsigned long *name_size,
819{ 865{
820 efi_status_t status; 866 efi_status_t status;
821 u32 phys_name_size, phys_name, phys_vendor; 867 u32 phys_name_size, phys_name, phys_vendor;
868 unsigned long flags;
869
870 spin_lock_irqsave(&efi_runtime_lock, flags);
822 871
823 phys_name_size = virt_to_phys_or_null(name_size); 872 phys_name_size = virt_to_phys_or_null(name_size);
824 phys_vendor = virt_to_phys_or_null(vendor); 873 phys_vendor = virt_to_phys_or_null(vendor);
@@ -827,6 +876,8 @@ efi_thunk_get_next_variable(unsigned long *name_size,
827 status = efi_thunk(get_next_variable, phys_name_size, 876 status = efi_thunk(get_next_variable, phys_name_size,
828 phys_name, phys_vendor); 877 phys_name, phys_vendor);
829 878
879 spin_unlock_irqrestore(&efi_runtime_lock, flags);
880
830 return status; 881 return status;
831} 882}
832 883
@@ -835,10 +886,15 @@ efi_thunk_get_next_high_mono_count(u32 *count)
835{ 886{
836 efi_status_t status; 887 efi_status_t status;
837 u32 phys_count; 888 u32 phys_count;
889 unsigned long flags;
890
891 spin_lock_irqsave(&efi_runtime_lock, flags);
838 892
839 phys_count = virt_to_phys_or_null(count); 893 phys_count = virt_to_phys_or_null(count);
840 status = efi_thunk(get_next_high_mono_count, phys_count); 894 status = efi_thunk(get_next_high_mono_count, phys_count);
841 895
896 spin_unlock_irqrestore(&efi_runtime_lock, flags);
897
842 return status; 898 return status;
843} 899}
844 900
@@ -847,10 +903,15 @@ efi_thunk_reset_system(int reset_type, efi_status_t status,
847 unsigned long data_size, efi_char16_t *data) 903 unsigned long data_size, efi_char16_t *data)
848{ 904{
849 u32 phys_data; 905 u32 phys_data;
906 unsigned long flags;
907
908 spin_lock_irqsave(&efi_runtime_lock, flags);
850 909
851 phys_data = virt_to_phys_or_null_size(data, data_size); 910 phys_data = virt_to_phys_or_null_size(data, data_size);
852 911
853 efi_thunk(reset_system, reset_type, status, data_size, phys_data); 912 efi_thunk(reset_system, reset_type, status, data_size, phys_data);
913
914 spin_unlock_irqrestore(&efi_runtime_lock, flags);
854} 915}
855 916
856static efi_status_t 917static efi_status_t
@@ -872,10 +933,13 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
872{ 933{
873 efi_status_t status; 934 efi_status_t status;
874 u32 phys_storage, phys_remaining, phys_max; 935 u32 phys_storage, phys_remaining, phys_max;
936 unsigned long flags;
875 937
876 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) 938 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
877 return EFI_UNSUPPORTED; 939 return EFI_UNSUPPORTED;
878 940
941 spin_lock_irqsave(&efi_runtime_lock, flags);
942
879 phys_storage = virt_to_phys_or_null(storage_space); 943 phys_storage = virt_to_phys_or_null(storage_space);
880 phys_remaining = virt_to_phys_or_null(remaining_space); 944 phys_remaining = virt_to_phys_or_null(remaining_space);
881 phys_max = virt_to_phys_or_null(max_variable_size); 945 phys_max = virt_to_phys_or_null(max_variable_size);
@@ -883,6 +947,35 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
883 status = efi_thunk(query_variable_info, attr, phys_storage, 947 status = efi_thunk(query_variable_info, attr, phys_storage,
884 phys_remaining, phys_max); 948 phys_remaining, phys_max);
885 949
950 spin_unlock_irqrestore(&efi_runtime_lock, flags);
951
952 return status;
953}
954
955static efi_status_t
956efi_thunk_query_variable_info_nonblocking(u32 attr, u64 *storage_space,
957 u64 *remaining_space,
958 u64 *max_variable_size)
959{
960 efi_status_t status;
961 u32 phys_storage, phys_remaining, phys_max;
962 unsigned long flags;
963
964 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
965 return EFI_UNSUPPORTED;
966
967 if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
968 return EFI_NOT_READY;
969
970 phys_storage = virt_to_phys_or_null(storage_space);
971 phys_remaining = virt_to_phys_or_null(remaining_space);
972 phys_max = virt_to_phys_or_null(max_variable_size);
973
974 status = efi_thunk(query_variable_info, attr, phys_storage,
975 phys_remaining, phys_max);
976
977 spin_unlock_irqrestore(&efi_runtime_lock, flags);
978
886 return status; 979 return status;
887} 980}
888 981
@@ -908,9 +1001,11 @@ void efi_thunk_runtime_setup(void)
908 efi.get_variable = efi_thunk_get_variable; 1001 efi.get_variable = efi_thunk_get_variable;
909 efi.get_next_variable = efi_thunk_get_next_variable; 1002 efi.get_next_variable = efi_thunk_get_next_variable;
910 efi.set_variable = efi_thunk_set_variable; 1003 efi.set_variable = efi_thunk_set_variable;
1004 efi.set_variable_nonblocking = efi_thunk_set_variable_nonblocking;
911 efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count; 1005 efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count;
912 efi.reset_system = efi_thunk_reset_system; 1006 efi.reset_system = efi_thunk_reset_system;
913 efi.query_variable_info = efi_thunk_query_variable_info; 1007 efi.query_variable_info = efi_thunk_query_variable_info;
1008 efi.query_variable_info_nonblocking = efi_thunk_query_variable_info_nonblocking;
914 efi.update_capsule = efi_thunk_update_capsule; 1009 efi.update_capsule = efi_thunk_update_capsule;
915 efi.query_capsule_caps = efi_thunk_query_capsule_caps; 1010 efi.query_capsule_caps = efi_thunk_query_capsule_caps;
916} 1011}
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 36c1f8b9f7e0..844d31cb8a0c 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -105,12 +105,11 @@ early_param("efi_no_storage_paranoia", setup_storage_paranoia);
105*/ 105*/
106void efi_delete_dummy_variable(void) 106void efi_delete_dummy_variable(void)
107{ 107{
108 efi.set_variable((efi_char16_t *)efi_dummy_name, 108 efi.set_variable_nonblocking((efi_char16_t *)efi_dummy_name,
109 &EFI_DUMMY_GUID, 109 &EFI_DUMMY_GUID,
110 EFI_VARIABLE_NON_VOLATILE | 110 EFI_VARIABLE_NON_VOLATILE |
111 EFI_VARIABLE_BOOTSERVICE_ACCESS | 111 EFI_VARIABLE_BOOTSERVICE_ACCESS |
112 EFI_VARIABLE_RUNTIME_ACCESS, 112 EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL);
113 0, NULL);
114} 113}
115 114
116/* 115/*
@@ -249,7 +248,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
249 int num_entries; 248 int num_entries;
250 void *new; 249 void *new;
251 250
252 if (efi_mem_desc_lookup(addr, &md)) { 251 if (efi_mem_desc_lookup(addr, &md) ||
252 md.type != EFI_BOOT_SERVICES_DATA) {
253 pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); 253 pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr);
254 return; 254 return;
255 } 255 }
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index fa021dfab088..5cf886c867c2 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfld.o pwr.o 1obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o pwr.o
2 2
3# SFI specific code 3# SFI specific code
4ifdef CONFIG_X86_INTEL_MID 4ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 2ebdf31d9996..56f66eafb94f 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -36,8 +36,6 @@
36#include <asm/apb_timer.h> 36#include <asm/apb_timer.h>
37#include <asm/reboot.h> 37#include <asm/reboot.h>
38 38
39#include "intel_mid_weak_decls.h"
40
41/* 39/*
42 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, 40 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
43 * cmdline option x86_intel_mid_timer can be used to override the configuration 41 * cmdline option x86_intel_mid_timer can be used to override the configuration
@@ -61,10 +59,6 @@
61 59
62enum intel_mid_timer_options intel_mid_timer_options; 60enum intel_mid_timer_options intel_mid_timer_options;
63 61
64/* intel_mid_ops to store sub arch ops */
65static struct intel_mid_ops *intel_mid_ops;
66/* getter function for sub arch ops*/
67static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT;
68enum intel_mid_cpu_type __intel_mid_cpu_chip; 62enum intel_mid_cpu_type __intel_mid_cpu_chip;
69EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); 63EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
70 64
@@ -82,11 +76,6 @@ static void intel_mid_reboot(void)
82 intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); 76 intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
83} 77}
84 78
85static unsigned long __init intel_mid_calibrate_tsc(void)
86{
87 return 0;
88}
89
90static void __init intel_mid_setup_bp_timer(void) 79static void __init intel_mid_setup_bp_timer(void)
91{ 80{
92 apbt_time_init(); 81 apbt_time_init();
@@ -133,6 +122,7 @@ static void intel_mid_arch_setup(void)
133 case 0x3C: 122 case 0x3C:
134 case 0x4A: 123 case 0x4A:
135 __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER; 124 __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER;
125 x86_platform.legacy.rtc = 1;
136 break; 126 break;
137 case 0x27: 127 case 0x27:
138 default: 128 default:
@@ -140,17 +130,7 @@ static void intel_mid_arch_setup(void)
140 break; 130 break;
141 } 131 }
142 132
143 if (__intel_mid_cpu_chip < MAX_CPU_OPS(get_intel_mid_ops))
144 intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
145 else {
146 intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
147 pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
148 }
149
150out: 133out:
151 if (intel_mid_ops->arch_setup)
152 intel_mid_ops->arch_setup();
153
154 /* 134 /*
155 * Intel MID platforms are using explicitly defined regulators. 135 * Intel MID platforms are using explicitly defined regulators.
156 * 136 *
@@ -191,7 +171,6 @@ void __init x86_intel_mid_early_setup(void)
191 171
192 x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; 172 x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
193 173
194 x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
195 x86_platform.get_nmi_reason = intel_mid_get_nmi_reason; 174 x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
196 175
197 x86_init.pci.arch_init = intel_mid_pci_init; 176 x86_init.pci.arch_init = intel_mid_pci_init;
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
deleted file mode 100644
index 3c1c3866d82b..000000000000
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ /dev/null
@@ -1,18 +0,0 @@
1/*
2 * intel_mid_weak_decls.h: Weak declarations of intel-mid.c
3 *
4 * (C) Copyright 2013 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11
12
13/* For every CPU addition a new get_<cpuname>_ops interface needs
14 * to be added.
15 */
16extern void *get_penwell_ops(void);
17extern void *get_cloverview_ops(void);
18extern void *get_tangier_ops(void);
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
deleted file mode 100644
index e42978d4deaf..000000000000
--- a/arch/x86/platform/intel-mid/mfld.c
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * mfld.c: Intel Medfield platform setup code
3 *
4 * (C) Copyright 2013 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11
12#include <linux/init.h>
13
14#include <asm/apic.h>
15#include <asm/intel-mid.h>
16#include <asm/intel_mid_vrtc.h>
17
18#include "intel_mid_weak_decls.h"
19
20static unsigned long __init mfld_calibrate_tsc(void)
21{
22 unsigned long fast_calibrate;
23 u32 lo, hi, ratio, fsb;
24
25 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
26 pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
27 ratio = (hi >> 8) & 0x1f;
28 pr_debug("ratio is %d\n", ratio);
29 if (!ratio) {
30 pr_err("read a zero ratio, should be incorrect!\n");
31 pr_err("force tsc ratio to 16 ...\n");
32 ratio = 16;
33 }
34 rdmsr(MSR_FSB_FREQ, lo, hi);
35 if ((lo & 0x7) == 0x7)
36 fsb = FSB_FREQ_83SKU;
37 else
38 fsb = FSB_FREQ_100SKU;
39 fast_calibrate = ratio * fsb;
40 pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
41 lapic_timer_frequency = fsb * 1000 / HZ;
42
43 /*
44 * TSC on Intel Atom SoCs is reliable and of known frequency.
45 * See tsc_msr.c for details.
46 */
47 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
48 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
49
50 return fast_calibrate;
51}
52
53static void __init penwell_arch_setup(void)
54{
55 x86_platform.calibrate_tsc = mfld_calibrate_tsc;
56}
57
58static struct intel_mid_ops penwell_ops = {
59 .arch_setup = penwell_arch_setup,
60};
61
62void *get_penwell_ops(void)
63{
64 return &penwell_ops;
65}
66
67void *get_cloverview_ops(void)
68{
69 return &penwell_ops;
70}
diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c
deleted file mode 100644
index ae7bdeb0e507..000000000000
--- a/arch/x86/platform/intel-mid/mrfld.c
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Intel Merrifield platform specific setup code
3 *
4 * (C) Copyright 2013 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11
12#include <linux/init.h>
13
14#include <asm/apic.h>
15#include <asm/intel-mid.h>
16
17#include "intel_mid_weak_decls.h"
18
19static unsigned long __init tangier_calibrate_tsc(void)
20{
21 unsigned long fast_calibrate;
22 u32 lo, hi, ratio, fsb, bus_freq;
23
24 /* *********************** */
25 /* Compute TSC:Ratio * FSB */
26 /* *********************** */
27
28 /* Compute Ratio */
29 rdmsr(MSR_PLATFORM_INFO, lo, hi);
30 pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo);
31
32 ratio = (lo >> 8) & 0xFF;
33 pr_debug("ratio is %d\n", ratio);
34 if (!ratio) {
35 pr_err("Read a zero ratio, force tsc ratio to 4 ...\n");
36 ratio = 4;
37 }
38
39 /* Compute FSB */
40 rdmsr(MSR_FSB_FREQ, lo, hi);
41 pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n",
42 hi, lo);
43
44 bus_freq = lo & 0x7;
45 pr_debug("bus_freq = 0x%x\n", bus_freq);
46
47 if (bus_freq == 0)
48 fsb = FSB_FREQ_100SKU;
49 else if (bus_freq == 1)
50 fsb = FSB_FREQ_100SKU;
51 else if (bus_freq == 2)
52 fsb = FSB_FREQ_133SKU;
53 else if (bus_freq == 3)
54 fsb = FSB_FREQ_167SKU;
55 else if (bus_freq == 4)
56 fsb = FSB_FREQ_83SKU;
57 else if (bus_freq == 5)
58 fsb = FSB_FREQ_400SKU;
59 else if (bus_freq == 6)
60 fsb = FSB_FREQ_267SKU;
61 else if (bus_freq == 7)
62 fsb = FSB_FREQ_333SKU;
63 else {
64 BUG();
65 pr_err("Invalid bus_freq! Setting to minimal value!\n");
66 fsb = FSB_FREQ_100SKU;
67 }
68
69 /* TSC = FSB Freq * Resolved HFM Ratio */
70 fast_calibrate = ratio * fsb;
71 pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate);
72
73 /* ************************************ */
74 /* Calculate Local APIC Timer Frequency */
75 /* ************************************ */
76 lapic_timer_frequency = (fsb * 1000) / HZ;
77
78 pr_debug("Setting lapic_timer_frequency = %d\n",
79 lapic_timer_frequency);
80
81 /*
82 * TSC on Intel Atom SoCs is reliable and of known frequency.
83 * See tsc_msr.c for details.
84 */
85 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
86 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
87
88 return fast_calibrate;
89}
90
91static void __init tangier_arch_setup(void)
92{
93 x86_platform.calibrate_tsc = tangier_calibrate_tsc;
94 x86_platform.legacy.rtc = 1;
95}
96
97/* tangier arch ops */
98static struct intel_mid_ops tangier_ops = {
99 .arch_setup = tangier_arch_setup,
100};
101
102void *get_tangier_ops(void)
103{
104 return &tangier_ops;
105}
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 7c3077e58fa0..f0e920fb98ad 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -311,10 +311,8 @@ static int __init add_xo1_platform_devices(void)
311 return PTR_ERR(pdev); 311 return PTR_ERR(pdev);
312 312
313 pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0); 313 pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0);
314 if (IS_ERR(pdev))
315 return PTR_ERR(pdev);
316 314
317 return 0; 315 return PTR_ERR_OR_ZERO(pdev);
318} 316}
319 317
320static int olpc_xo1_ec_probe(struct platform_device *pdev) 318static int olpc_xo1_ec_probe(struct platform_device *pdev)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index ca446da48fd2..e26dfad507c8 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1607,8 +1607,6 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr,
1607 *tunables[cnt].tunp = val; 1607 *tunables[cnt].tunp = val;
1608 continue; 1608 continue;
1609 } 1609 }
1610 if (q == p)
1611 break;
1612 } 1610 }
1613 return 0; 1611 return 0;
1614} 1612}
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index ce8da3a0412c..fd369a6e9ff8 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -137,7 +137,7 @@ ENTRY(restore_registers)
137 /* Saved in save_processor_state. */ 137 /* Saved in save_processor_state. */
138 lgdt saved_context_gdt_desc(%rax) 138 lgdt saved_context_gdt_desc(%rax)
139 139
140 xorq %rax, %rax 140 xorl %eax, %eax
141 141
142 /* tell the hibernation core that we've just restored the memory */ 142 /* tell the hibernation core that we've just restored the memory */
143 movq %rax, in_suspend(%rip) 143 movq %rax, in_suspend(%rip)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 220e97841e49..3a6c8ebc8032 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -67,6 +67,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
67 "__tracedata_(start|end)|" 67 "__tracedata_(start|end)|"
68 "__(start|stop)_notes|" 68 "__(start|stop)_notes|"
69 "__end_rodata|" 69 "__end_rodata|"
70 "__end_rodata_aligned|"
70 "__initramfs_start|" 71 "__initramfs_start|"
71 "(jiffies|jiffies_64)|" 72 "(jiffies|jiffies_64)|"
72#if ELF_BITS == 64 73#if ELF_BITS == 64
diff --git a/arch/x86/um/vdso/.gitignore b/arch/x86/um/vdso/.gitignore
index 9cac6d072199..f8b69d84238e 100644
--- a/arch/x86/um/vdso/.gitignore
+++ b/arch/x86/um/vdso/.gitignore
@@ -1,2 +1 @@
1vdso-syms.lds
2vdso.lds vdso.lds
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index b2d6967262b2..822ccdba93ad 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -53,22 +53,6 @@ $(vobjs): KBUILD_CFLAGS += $(CFL)
53CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage 53CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage
54CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage 54CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
55 55
56targets += vdso-syms.lds
57extra-$(VDSO64-y) += vdso-syms.lds
58
59#
60# Match symbols in the DSO that look like VDSO*; produce a file of constants.
61#
62sed-vdsosym := -e 's/^00*/0/' \
63 -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
64quiet_cmd_vdsosym = VDSOSYM $@
65define cmd_vdsosym
66 $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
67endef
68
69$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
70 $(call if_changed,vdsosym)
71
72# 56#
73# The DSO images are built using a special linker script. 57# The DSO images are built using a special linker script.
74# 58#
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 439a94bf89ad..105a57d73701 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -119,6 +119,27 @@ static void __init xen_banner(void)
119 version >> 16, version & 0xffff, extra.extraversion, 119 version >> 16, version & 0xffff, extra.extraversion,
120 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 120 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
121} 121}
122
123static void __init xen_pv_init_platform(void)
124{
125 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
126 HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
127
128 /* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */
129 xen_vcpu_info_reset(0);
130
131 /* pvclock is in shared info area */
132 xen_init_time_ops();
133}
134
135static void __init xen_pv_guest_late_init(void)
136{
137#ifndef CONFIG_SMP
138 /* Setup shared vcpu info for non-smp configurations */
139 xen_setup_vcpu_info_placement();
140#endif
141}
142
122/* Check if running on Xen version (major, minor) or later */ 143/* Check if running on Xen version (major, minor) or later */
123bool 144bool
124xen_running_on_version_or_later(unsigned int major, unsigned int minor) 145xen_running_on_version_or_later(unsigned int major, unsigned int minor)
@@ -947,34 +968,8 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
947 xen_write_msr_safe(msr, low, high); 968 xen_write_msr_safe(msr, low, high);
948} 969}
949 970
950void xen_setup_shared_info(void)
951{
952 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
953
954 HYPERVISOR_shared_info =
955 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
956
957 xen_setup_mfn_list_list();
958
959 if (system_state == SYSTEM_BOOTING) {
960#ifndef CONFIG_SMP
961 /*
962 * In UP this is as good a place as any to set up shared info.
963 * Limit this to boot only, at restore vcpu setup is done via
964 * xen_vcpu_restore().
965 */
966 xen_setup_vcpu_info_placement();
967#endif
968 /*
969 * Now that shared info is set up we can start using routines
970 * that point to pvclock area.
971 */
972 xen_init_time_ops();
973 }
974}
975
976/* This is called once we have the cpu_possible_mask */ 971/* This is called once we have the cpu_possible_mask */
977void __ref xen_setup_vcpu_info_placement(void) 972void __init xen_setup_vcpu_info_placement(void)
978{ 973{
979 int cpu; 974 int cpu;
980 975
@@ -1228,6 +1223,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
1228 x86_init.irqs.intr_mode_init = x86_init_noop; 1223 x86_init.irqs.intr_mode_init = x86_init_noop;
1229 x86_init.oem.arch_setup = xen_arch_setup; 1224 x86_init.oem.arch_setup = xen_arch_setup;
1230 x86_init.oem.banner = xen_banner; 1225 x86_init.oem.banner = xen_banner;
1226 x86_init.hyper.init_platform = xen_pv_init_platform;
1227 x86_init.hyper.guest_late_init = xen_pv_guest_late_init;
1231 1228
1232 /* 1229 /*
1233 * Set up some pagetable state before starting to set any ptes. 1230 * Set up some pagetable state before starting to set any ptes.
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2c30cabfda90..52206ad81e4b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1230,8 +1230,7 @@ static void __init xen_pagetable_p2m_free(void)
1230 * We roundup to the PMD, which means that if anybody at this stage is 1230 * We roundup to the PMD, which means that if anybody at this stage is
1231 * using the __ka address of xen_start_info or 1231 * using the __ka address of xen_start_info or
1232 * xen_start_info->shared_info they are in going to crash. Fortunatly 1232 * xen_start_info->shared_info they are in going to crash. Fortunatly
1233 * we have already revectored in xen_setup_kernel_pagetable and in 1233 * we have already revectored in xen_setup_kernel_pagetable.
1234 * xen_setup_shared_info.
1235 */ 1234 */
1236 size = roundup(size, PMD_SIZE); 1235 size = roundup(size, PMD_SIZE);
1237 1236
@@ -1292,8 +1291,7 @@ static void __init xen_pagetable_init(void)
1292 1291
1293 /* Remap memory freed due to conflicts with E820 map */ 1292 /* Remap memory freed due to conflicts with E820 map */
1294 xen_remap_memory(); 1293 xen_remap_memory();
1295 1294 xen_setup_mfn_list_list();
1296 xen_setup_shared_info();
1297} 1295}
1298static void xen_write_cr2(unsigned long cr2) 1296static void xen_write_cr2(unsigned long cr2)
1299{ 1297{
diff --git a/arch/x86/xen/suspend_pv.c b/arch/x86/xen/suspend_pv.c
index a2e0f110af56..8303b58c79a9 100644
--- a/arch/x86/xen/suspend_pv.c
+++ b/arch/x86/xen/suspend_pv.c
@@ -27,8 +27,9 @@ void xen_pv_pre_suspend(void)
27void xen_pv_post_suspend(int suspend_cancelled) 27void xen_pv_post_suspend(int suspend_cancelled)
28{ 28{
29 xen_build_mfn_list_list(); 29 xen_build_mfn_list_list();
30 30 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
31 xen_setup_shared_info(); 31 HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
32 xen_setup_mfn_list_list();
32 33
33 if (suspend_cancelled) { 34 if (suspend_cancelled) {
34 xen_start_info->store_mfn = 35 xen_start_info->store_mfn =
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index e0f1bcf01d63..c84f1e039d84 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -31,6 +31,8 @@
31/* Xen may fire a timer up to this many ns early */ 31/* Xen may fire a timer up to this many ns early */
32#define TIMER_SLOP 100000 32#define TIMER_SLOP 100000
33 33
34static u64 xen_sched_clock_offset __read_mostly;
35
34/* Get the TSC speed from Xen */ 36/* Get the TSC speed from Xen */
35static unsigned long xen_tsc_khz(void) 37static unsigned long xen_tsc_khz(void)
36{ 38{
@@ -40,7 +42,7 @@ static unsigned long xen_tsc_khz(void)
40 return pvclock_tsc_khz(info); 42 return pvclock_tsc_khz(info);
41} 43}
42 44
43u64 xen_clocksource_read(void) 45static u64 xen_clocksource_read(void)
44{ 46{
45 struct pvclock_vcpu_time_info *src; 47 struct pvclock_vcpu_time_info *src;
46 u64 ret; 48 u64 ret;
@@ -57,6 +59,11 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs)
57 return xen_clocksource_read(); 59 return xen_clocksource_read();
58} 60}
59 61
62static u64 xen_sched_clock(void)
63{
64 return xen_clocksource_read() - xen_sched_clock_offset;
65}
66
60static void xen_read_wallclock(struct timespec64 *ts) 67static void xen_read_wallclock(struct timespec64 *ts)
61{ 68{
62 struct shared_info *s = HYPERVISOR_shared_info; 69 struct shared_info *s = HYPERVISOR_shared_info;
@@ -367,7 +374,7 @@ void xen_timer_resume(void)
367} 374}
368 375
369static const struct pv_time_ops xen_time_ops __initconst = { 376static const struct pv_time_ops xen_time_ops __initconst = {
370 .sched_clock = xen_clocksource_read, 377 .sched_clock = xen_sched_clock,
371 .steal_clock = xen_steal_clock, 378 .steal_clock = xen_steal_clock,
372}; 379};
373 380
@@ -503,8 +510,9 @@ static void __init xen_time_init(void)
503 pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); 510 pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
504} 511}
505 512
506void __ref xen_init_time_ops(void) 513void __init xen_init_time_ops(void)
507{ 514{
515 xen_sched_clock_offset = xen_clocksource_read();
508 pv_time_ops = xen_time_ops; 516 pv_time_ops = xen_time_ops;
509 517
510 x86_init.timers.timer_init = xen_time_init; 518 x86_init.timers.timer_init = xen_time_init;
@@ -542,11 +550,11 @@ void __init xen_hvm_init_time_ops(void)
542 return; 550 return;
543 551
544 if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { 552 if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
545 printk(KERN_INFO "Xen doesn't support pvclock on HVM," 553 pr_info("Xen doesn't support pvclock on HVM, disable pv timer");
546 "disable pv timer\n");
547 return; 554 return;
548 } 555 }
549 556
557 xen_sched_clock_offset = xen_clocksource_read();
550 pv_time_ops = xen_time_ops; 558 pv_time_ops = xen_time_ops;
551 x86_init.timers.setup_percpu_clockev = xen_time_init; 559 x86_init.timers.setup_percpu_clockev = xen_time_init;
552 x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; 560 x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3b34745d0a52..e78684597f57 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -31,7 +31,6 @@ extern struct shared_info xen_dummy_shared_info;
31extern struct shared_info *HYPERVISOR_shared_info; 31extern struct shared_info *HYPERVISOR_shared_info;
32 32
33void xen_setup_mfn_list_list(void); 33void xen_setup_mfn_list_list(void);
34void xen_setup_shared_info(void);
35void xen_build_mfn_list_list(void); 34void xen_build_mfn_list_list(void);
36void xen_setup_machphys_mapping(void); 35void xen_setup_machphys_mapping(void);
37void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); 36void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
@@ -68,12 +67,11 @@ void xen_init_irq_ops(void);
68void xen_setup_timer(int cpu); 67void xen_setup_timer(int cpu);
69void xen_setup_runstate_info(int cpu); 68void xen_setup_runstate_info(int cpu);
70void xen_teardown_timer(int cpu); 69void xen_teardown_timer(int cpu);
71u64 xen_clocksource_read(void);
72void xen_setup_cpu_clockevents(void); 70void xen_setup_cpu_clockevents(void);
73void xen_save_time_memory_area(void); 71void xen_save_time_memory_area(void);
74void xen_restore_time_memory_area(void); 72void xen_restore_time_memory_area(void);
75void __ref xen_init_time_ops(void); 73void xen_init_time_ops(void);
76void __init xen_hvm_init_time_ops(void); 74void xen_hvm_init_time_ops(void);
77 75
78irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 76irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
79 77
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index e7a23f2a519a..7de0149e1cf7 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -197,107 +197,9 @@ ATOMIC_OPS(xor)
197#undef ATOMIC_OP_RETURN 197#undef ATOMIC_OP_RETURN
198#undef ATOMIC_OP 198#undef ATOMIC_OP
199 199
200/**
201 * atomic_sub_and_test - subtract value from variable and test result
202 * @i: integer value to subtract
203 * @v: pointer of type atomic_t
204 *
205 * Atomically subtracts @i from @v and returns
206 * true if the result is zero, or false for all
207 * other cases.
208 */
209#define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0)
210
211/**
212 * atomic_inc - increment atomic variable
213 * @v: pointer of type atomic_t
214 *
215 * Atomically increments @v by 1.
216 */
217#define atomic_inc(v) atomic_add(1,(v))
218
219/**
220 * atomic_inc - increment atomic variable
221 * @v: pointer of type atomic_t
222 *
223 * Atomically increments @v by 1.
224 */
225#define atomic_inc_return(v) atomic_add_return(1,(v))
226
227/**
228 * atomic_dec - decrement atomic variable
229 * @v: pointer of type atomic_t
230 *
231 * Atomically decrements @v by 1.
232 */
233#define atomic_dec(v) atomic_sub(1,(v))
234
235/**
236 * atomic_dec_return - decrement atomic variable
237 * @v: pointer of type atomic_t
238 *
239 * Atomically decrements @v by 1.
240 */
241#define atomic_dec_return(v) atomic_sub_return(1,(v))
242
243/**
244 * atomic_dec_and_test - decrement and test
245 * @v: pointer of type atomic_t
246 *
247 * Atomically decrements @v by 1 and
248 * returns true if the result is 0, or false for all other
249 * cases.
250 */
251#define atomic_dec_and_test(v) (atomic_sub_return(1,(v)) == 0)
252
253/**
254 * atomic_inc_and_test - increment and test
255 * @v: pointer of type atomic_t
256 *
257 * Atomically increments @v by 1
258 * and returns true if the result is zero, or false for all
259 * other cases.
260 */
261#define atomic_inc_and_test(v) (atomic_add_return(1,(v)) == 0)
262
263/**
264 * atomic_add_negative - add and test if negative
265 * @v: pointer of type atomic_t
266 * @i: integer value to add
267 *
268 * Atomically adds @i to @v and returns true
269 * if the result is negative, or false when
270 * result is greater than or equal to zero.
271 */
272#define atomic_add_negative(i,v) (atomic_add_return((i),(v)) < 0)
273
274#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) 200#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
275#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) 201#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
276 202
277/**
278 * __atomic_add_unless - add unless the number is a given value
279 * @v: pointer of type atomic_t
280 * @a: the amount to add to v...
281 * @u: ...unless v is equal to u.
282 *
283 * Atomically adds @a to @v, so long as it was not @u.
284 * Returns the old value of @v.
285 */
286static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
287{
288 int c, old;
289 c = atomic_read(v);
290 for (;;) {
291 if (unlikely(c == (u)))
292 break;
293 old = atomic_cmpxchg((v), c, c + (a));
294 if (likely(old == c))
295 break;
296 c = old;
297 }
298 return c;
299}
300
301#endif /* __KERNEL__ */ 203#endif /* __KERNEL__ */
302 204
303#endif /* _XTENSA_ATOMIC_H */ 205#endif /* _XTENSA_ATOMIC_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index f84a9b7b6f5a..ee33590f54eb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2155,11 +2155,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
2155 if (part->policy && op_is_write(bio_op(bio))) { 2155 if (part->policy && op_is_write(bio_op(bio))) {
2156 char b[BDEVNAME_SIZE]; 2156 char b[BDEVNAME_SIZE];
2157 2157
2158 printk(KERN_ERR 2158 WARN_ONCE(1,
2159 "generic_make_request: Trying to write " 2159 "generic_make_request: Trying to write "
2160 "to read-only block-device %s (partno %d)\n", 2160 "to read-only block-device %s (partno %d)\n",
2161 bio_devname(bio, b), part->partno); 2161 bio_devname(bio, b), part->partno);
2162 return true; 2162 /* Older lvm-tools actually trigger this */
2163 return false;
2163 } 2164 }
2164 2165
2165 return false; 2166 return false;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 09b2ee6694fb..3de0836163c2 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -271,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
271 * test and set the bit before assining ->rqs[]. 271 * test and set the bit before assining ->rqs[].
272 */ 272 */
273 rq = tags->rqs[bitnr]; 273 rq = tags->rqs[bitnr];
274 if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) 274 if (rq && blk_mq_request_started(rq))
275 iter_data->fn(rq, iter_data->data, reserved); 275 iter_data->fn(rq, iter_data->data, reserved);
276 276
277 return true; 277 return true;
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index f8fecfec5df9..9706613eecf9 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -879,6 +879,7 @@ static void acpi_lpss_dismiss(struct device *dev)
879#define LPSS_GPIODEF0_DMA_LLP BIT(13) 879#define LPSS_GPIODEF0_DMA_LLP BIT(13)
880 880
881static DEFINE_MUTEX(lpss_iosf_mutex); 881static DEFINE_MUTEX(lpss_iosf_mutex);
882static bool lpss_iosf_d3_entered;
882 883
883static void lpss_iosf_enter_d3_state(void) 884static void lpss_iosf_enter_d3_state(void)
884{ 885{
@@ -921,6 +922,9 @@ static void lpss_iosf_enter_d3_state(void)
921 922
922 iosf_mbi_modify(LPSS_IOSF_UNIT_LPIOEP, MBI_CR_WRITE, 923 iosf_mbi_modify(LPSS_IOSF_UNIT_LPIOEP, MBI_CR_WRITE,
923 LPSS_IOSF_GPIODEF0, value1, mask1); 924 LPSS_IOSF_GPIODEF0, value1, mask1);
925
926 lpss_iosf_d3_entered = true;
927
924exit: 928exit:
925 mutex_unlock(&lpss_iosf_mutex); 929 mutex_unlock(&lpss_iosf_mutex);
926} 930}
@@ -935,6 +939,11 @@ static void lpss_iosf_exit_d3_state(void)
935 939
936 mutex_lock(&lpss_iosf_mutex); 940 mutex_lock(&lpss_iosf_mutex);
937 941
942 if (!lpss_iosf_d3_entered)
943 goto exit;
944
945 lpss_iosf_d3_entered = false;
946
938 iosf_mbi_modify(LPSS_IOSF_UNIT_LPIOEP, MBI_CR_WRITE, 947 iosf_mbi_modify(LPSS_IOSF_UNIT_LPIOEP, MBI_CR_WRITE,
939 LPSS_IOSF_GPIODEF0, value1, mask1); 948 LPSS_IOSF_GPIODEF0, value1, mask1);
940 949
@@ -944,13 +953,13 @@ static void lpss_iosf_exit_d3_state(void)
944 iosf_mbi_modify(LPSS_IOSF_UNIT_LPIO1, MBI_CFG_WRITE, 953 iosf_mbi_modify(LPSS_IOSF_UNIT_LPIO1, MBI_CFG_WRITE,
945 LPSS_IOSF_PMCSR, value2, mask2); 954 LPSS_IOSF_PMCSR, value2, mask2);
946 955
956exit:
947 mutex_unlock(&lpss_iosf_mutex); 957 mutex_unlock(&lpss_iosf_mutex);
948} 958}
949 959
950static int acpi_lpss_suspend(struct device *dev, bool runtime) 960static int acpi_lpss_suspend(struct device *dev, bool wakeup)
951{ 961{
952 struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); 962 struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
953 bool wakeup = runtime || device_may_wakeup(dev);
954 int ret; 963 int ret;
955 964
956 if (pdata->dev_desc->flags & LPSS_SAVE_CTX) 965 if (pdata->dev_desc->flags & LPSS_SAVE_CTX)
@@ -963,14 +972,14 @@ static int acpi_lpss_suspend(struct device *dev, bool runtime)
963 * wrong status for devices being about to be powered off. See 972 * wrong status for devices being about to be powered off. See
964 * lpss_iosf_enter_d3_state() for further information. 973 * lpss_iosf_enter_d3_state() for further information.
965 */ 974 */
966 if ((runtime || !pm_suspend_via_firmware()) && 975 if (acpi_target_system_state() == ACPI_STATE_S0 &&
967 lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available()) 976 lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
968 lpss_iosf_enter_d3_state(); 977 lpss_iosf_enter_d3_state();
969 978
970 return ret; 979 return ret;
971} 980}
972 981
973static int acpi_lpss_resume(struct device *dev, bool runtime) 982static int acpi_lpss_resume(struct device *dev)
974{ 983{
975 struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); 984 struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
976 int ret; 985 int ret;
@@ -979,8 +988,7 @@ static int acpi_lpss_resume(struct device *dev, bool runtime)
979 * This call is kept first to be in symmetry with 988 * This call is kept first to be in symmetry with
980 * acpi_lpss_runtime_suspend() one. 989 * acpi_lpss_runtime_suspend() one.
981 */ 990 */
982 if ((runtime || !pm_resume_via_firmware()) && 991 if (lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
983 lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
984 lpss_iosf_exit_d3_state(); 992 lpss_iosf_exit_d3_state();
985 993
986 ret = acpi_dev_resume(dev); 994 ret = acpi_dev_resume(dev);
@@ -1004,12 +1012,12 @@ static int acpi_lpss_suspend_late(struct device *dev)
1004 return 0; 1012 return 0;
1005 1013
1006 ret = pm_generic_suspend_late(dev); 1014 ret = pm_generic_suspend_late(dev);
1007 return ret ? ret : acpi_lpss_suspend(dev, false); 1015 return ret ? ret : acpi_lpss_suspend(dev, device_may_wakeup(dev));
1008} 1016}
1009 1017
1010static int acpi_lpss_resume_early(struct device *dev) 1018static int acpi_lpss_resume_early(struct device *dev)
1011{ 1019{
1012 int ret = acpi_lpss_resume(dev, false); 1020 int ret = acpi_lpss_resume(dev);
1013 1021
1014 return ret ? ret : pm_generic_resume_early(dev); 1022 return ret ? ret : pm_generic_resume_early(dev);
1015} 1023}
@@ -1024,7 +1032,7 @@ static int acpi_lpss_runtime_suspend(struct device *dev)
1024 1032
1025static int acpi_lpss_runtime_resume(struct device *dev) 1033static int acpi_lpss_runtime_resume(struct device *dev)
1026{ 1034{
1027 int ret = acpi_lpss_resume(dev, true); 1035 int ret = acpi_lpss_resume(dev);
1028 1036
1029 return ret ? ret : pm_generic_runtime_resume(dev); 1037 return ret ? ret : pm_generic_runtime_resume(dev);
1030} 1038}
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index ee840be150b5..44f35ab3347d 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -709,15 +709,20 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
709 } else 709 } else
710 if ((walk_state-> 710 if ((walk_state->
711 parse_flags & ACPI_PARSE_MODULE_LEVEL) 711 parse_flags & ACPI_PARSE_MODULE_LEVEL)
712 && status != AE_CTRL_TRANSFER
712 && ACPI_FAILURE(status)) { 713 && ACPI_FAILURE(status)) {
713 /* 714 /*
714 * ACPI_PARSE_MODULE_LEVEL means that we are loading a table by 715 * ACPI_PARSE_MODULE_LEVEL flag means that we are currently
715 * executing it as a control method. However, if we encounter 716 * loading a table by executing it as a control method.
716 * an error while loading the table, we need to keep trying to 717 * However, if we encounter an error while loading the table,
717 * load the table rather than aborting the table load. Set the 718 * we need to keep trying to load the table rather than
718 * status to AE_OK to proceed with the table load. If we get a 719 * aborting the table load (setting the status to AE_OK
719 * failure at this point, it means that the dispatcher got an 720 * continues the table load). If we get a failure at this
720 * error while processing Op (most likely an AML operand error. 721 * point, it means that the dispatcher got an error while
722 * processing Op (most likely an AML operand error) or a
723 * control method was called from module level and the
724 * dispatcher returned AE_CTRL_TRANSFER. In the latter case,
725 * leave the status alone, there's nothing wrong with it.
721 */ 726 */
722 status = AE_OK; 727 status = AE_OK;
723 } 728 }
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index fa0729c1e776..d81c653b9bf6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -61,7 +61,7 @@ static int atomic_inc_return_safe(atomic_t *v)
61{ 61{
62 unsigned int counter; 62 unsigned int counter;
63 63
64 counter = (unsigned int)__atomic_add_unless(v, 1, 0); 64 counter = (unsigned int)atomic_fetch_add_unless(v, 1, 0);
65 if (counter <= (unsigned int)INT_MAX) 65 if (counter <= (unsigned int)INT_MAX)
66 return (int)counter; 66 return (int)counter;
67 67
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 7436b2d27fa3..a390c6d4f72d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -298,7 +298,8 @@ static void reset_bdev(struct zram *zram)
298 zram->backing_dev = NULL; 298 zram->backing_dev = NULL;
299 zram->old_block_size = 0; 299 zram->old_block_size = 0;
300 zram->bdev = NULL; 300 zram->bdev = NULL;
301 301 zram->disk->queue->backing_dev_info->capabilities |=
302 BDI_CAP_SYNCHRONOUS_IO;
302 kvfree(zram->bitmap); 303 kvfree(zram->bitmap);
303 zram->bitmap = NULL; 304 zram->bitmap = NULL;
304} 305}
@@ -400,6 +401,18 @@ static ssize_t backing_dev_store(struct device *dev,
400 zram->backing_dev = backing_dev; 401 zram->backing_dev = backing_dev;
401 zram->bitmap = bitmap; 402 zram->bitmap = bitmap;
402 zram->nr_pages = nr_pages; 403 zram->nr_pages = nr_pages;
404 /*
405 * With writeback feature, zram does asynchronous IO so it's no longer
406 * synchronous device so let's remove synchronous io flag. Othewise,
407 * upper layer(e.g., swap) could wait IO completion rather than
408 * (submit and return), which will cause system sluggish.
409 * Furthermore, when the IO function returns(e.g., swap_readpage),
410 * upper layer expects IO was done so it could deallocate the page
411 * freely but in fact, IO is going on so finally could cause
412 * use-after-free when the IO is really done.
413 */
414 zram->disk->queue->backing_dev_info->capabilities &=
415 ~BDI_CAP_SYNCHRONOUS_IO;
403 up_write(&zram->init_lock); 416 up_write(&zram->init_lock);
404 417
405 pr_info("setup backing device %s\n", file_name); 418 pr_info("setup backing device %s\n", file_name);
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 00caf37e52f9..c070cc7992e9 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_CLKSRC_SAMSUNG_PWM) += samsung_pwm_timer.o
49obj-$(CONFIG_FSL_FTM_TIMER) += fsl_ftm_timer.o 49obj-$(CONFIG_FSL_FTM_TIMER) += fsl_ftm_timer.o
50obj-$(CONFIG_VF_PIT_TIMER) += vf_pit_timer.o 50obj-$(CONFIG_VF_PIT_TIMER) += vf_pit_timer.o
51obj-$(CONFIG_CLKSRC_QCOM) += qcom-timer.o 51obj-$(CONFIG_CLKSRC_QCOM) += qcom-timer.o
52obj-$(CONFIG_MTK_TIMER) += mtk_timer.o 52obj-$(CONFIG_MTK_TIMER) += timer-mediatek.o
53obj-$(CONFIG_CLKSRC_PISTACHIO) += time-pistachio.o 53obj-$(CONFIG_CLKSRC_PISTACHIO) += time-pistachio.o
54obj-$(CONFIG_CLKSRC_TI_32K) += timer-ti-32k.o 54obj-$(CONFIG_CLKSRC_TI_32K) += timer-ti-32k.o
55obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o 55obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o
diff --git a/drivers/clocksource/mtk_timer.c b/drivers/clocksource/mtk_timer.c
deleted file mode 100644
index f9b724fd9950..000000000000
--- a/drivers/clocksource/mtk_timer.c
+++ /dev/null
@@ -1,268 +0,0 @@
1/*
2 * Mediatek SoCs General-Purpose Timer handling.
3 *
4 * Copyright (C) 2014 Matthias Brugger
5 *
6 * Matthias Brugger <matthias.bgg@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/clk.h>
22#include <linux/clockchips.h>
23#include <linux/interrupt.h>
24#include <linux/irq.h>
25#include <linux/irqreturn.h>
26#include <linux/of.h>
27#include <linux/of_address.h>
28#include <linux/of_irq.h>
29#include <linux/sched_clock.h>
30#include <linux/slab.h>
31
32#define GPT_IRQ_EN_REG 0x00
33#define GPT_IRQ_ENABLE(val) BIT((val) - 1)
34#define GPT_IRQ_ACK_REG 0x08
35#define GPT_IRQ_ACK(val) BIT((val) - 1)
36
37#define TIMER_CTRL_REG(val) (0x10 * (val))
38#define TIMER_CTRL_OP(val) (((val) & 0x3) << 4)
39#define TIMER_CTRL_OP_ONESHOT (0)
40#define TIMER_CTRL_OP_REPEAT (1)
41#define TIMER_CTRL_OP_FREERUN (3)
42#define TIMER_CTRL_CLEAR (2)
43#define TIMER_CTRL_ENABLE (1)
44#define TIMER_CTRL_DISABLE (0)
45
46#define TIMER_CLK_REG(val) (0x04 + (0x10 * (val)))
47#define TIMER_CLK_SRC(val) (((val) & 0x1) << 4)
48#define TIMER_CLK_SRC_SYS13M (0)
49#define TIMER_CLK_SRC_RTC32K (1)
50#define TIMER_CLK_DIV1 (0x0)
51#define TIMER_CLK_DIV2 (0x1)
52
53#define TIMER_CNT_REG(val) (0x08 + (0x10 * (val)))
54#define TIMER_CMP_REG(val) (0x0C + (0x10 * (val)))
55
56#define GPT_CLK_EVT 1
57#define GPT_CLK_SRC 2
58
59struct mtk_clock_event_device {
60 void __iomem *gpt_base;
61 u32 ticks_per_jiffy;
62 struct clock_event_device dev;
63};
64
65static void __iomem *gpt_sched_reg __read_mostly;
66
67static u64 notrace mtk_read_sched_clock(void)
68{
69 return readl_relaxed(gpt_sched_reg);
70}
71
72static inline struct mtk_clock_event_device *to_mtk_clk(
73 struct clock_event_device *c)
74{
75 return container_of(c, struct mtk_clock_event_device, dev);
76}
77
78static void mtk_clkevt_time_stop(struct mtk_clock_event_device *evt, u8 timer)
79{
80 u32 val;
81
82 val = readl(evt->gpt_base + TIMER_CTRL_REG(timer));
83 writel(val & ~TIMER_CTRL_ENABLE, evt->gpt_base +
84 TIMER_CTRL_REG(timer));
85}
86
87static void mtk_clkevt_time_setup(struct mtk_clock_event_device *evt,
88 unsigned long delay, u8 timer)
89{
90 writel(delay, evt->gpt_base + TIMER_CMP_REG(timer));
91}
92
93static void mtk_clkevt_time_start(struct mtk_clock_event_device *evt,
94 bool periodic, u8 timer)
95{
96 u32 val;
97
98 /* Acknowledge interrupt */
99 writel(GPT_IRQ_ACK(timer), evt->gpt_base + GPT_IRQ_ACK_REG);
100
101 val = readl(evt->gpt_base + TIMER_CTRL_REG(timer));
102
103 /* Clear 2 bit timer operation mode field */
104 val &= ~TIMER_CTRL_OP(0x3);
105
106 if (periodic)
107 val |= TIMER_CTRL_OP(TIMER_CTRL_OP_REPEAT);
108 else
109 val |= TIMER_CTRL_OP(TIMER_CTRL_OP_ONESHOT);
110
111 writel(val | TIMER_CTRL_ENABLE | TIMER_CTRL_CLEAR,
112 evt->gpt_base + TIMER_CTRL_REG(timer));
113}
114
115static int mtk_clkevt_shutdown(struct clock_event_device *clk)
116{
117 mtk_clkevt_time_stop(to_mtk_clk(clk), GPT_CLK_EVT);
118 return 0;
119}
120
121static int mtk_clkevt_set_periodic(struct clock_event_device *clk)
122{
123 struct mtk_clock_event_device *evt = to_mtk_clk(clk);
124
125 mtk_clkevt_time_stop(evt, GPT_CLK_EVT);
126 mtk_clkevt_time_setup(evt, evt->ticks_per_jiffy, GPT_CLK_EVT);
127 mtk_clkevt_time_start(evt, true, GPT_CLK_EVT);
128 return 0;
129}
130
131static int mtk_clkevt_next_event(unsigned long event,
132 struct clock_event_device *clk)
133{
134 struct mtk_clock_event_device *evt = to_mtk_clk(clk);
135
136 mtk_clkevt_time_stop(evt, GPT_CLK_EVT);
137 mtk_clkevt_time_setup(evt, event, GPT_CLK_EVT);
138 mtk_clkevt_time_start(evt, false, GPT_CLK_EVT);
139
140 return 0;
141}
142
143static irqreturn_t mtk_timer_interrupt(int irq, void *dev_id)
144{
145 struct mtk_clock_event_device *evt = dev_id;
146
147 /* Acknowledge timer0 irq */
148 writel(GPT_IRQ_ACK(GPT_CLK_EVT), evt->gpt_base + GPT_IRQ_ACK_REG);
149 evt->dev.event_handler(&evt->dev);
150
151 return IRQ_HANDLED;
152}
153
154static void
155__init mtk_timer_setup(struct mtk_clock_event_device *evt, u8 timer, u8 option)
156{
157 writel(TIMER_CTRL_CLEAR | TIMER_CTRL_DISABLE,
158 evt->gpt_base + TIMER_CTRL_REG(timer));
159
160 writel(TIMER_CLK_SRC(TIMER_CLK_SRC_SYS13M) | TIMER_CLK_DIV1,
161 evt->gpt_base + TIMER_CLK_REG(timer));
162
163 writel(0x0, evt->gpt_base + TIMER_CMP_REG(timer));
164
165 writel(TIMER_CTRL_OP(option) | TIMER_CTRL_ENABLE,
166 evt->gpt_base + TIMER_CTRL_REG(timer));
167}
168
169static void mtk_timer_enable_irq(struct mtk_clock_event_device *evt, u8 timer)
170{
171 u32 val;
172
173 /* Disable all interrupts */
174 writel(0x0, evt->gpt_base + GPT_IRQ_EN_REG);
175
176 /* Acknowledge all spurious pending interrupts */
177 writel(0x3f, evt->gpt_base + GPT_IRQ_ACK_REG);
178
179 val = readl(evt->gpt_base + GPT_IRQ_EN_REG);
180 writel(val | GPT_IRQ_ENABLE(timer),
181 evt->gpt_base + GPT_IRQ_EN_REG);
182}
183
184static int __init mtk_timer_init(struct device_node *node)
185{
186 struct mtk_clock_event_device *evt;
187 struct resource res;
188 unsigned long rate = 0;
189 struct clk *clk;
190
191 evt = kzalloc(sizeof(*evt), GFP_KERNEL);
192 if (!evt)
193 return -ENOMEM;
194
195 evt->dev.name = "mtk_tick";
196 evt->dev.rating = 300;
197 evt->dev.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
198 evt->dev.set_state_shutdown = mtk_clkevt_shutdown;
199 evt->dev.set_state_periodic = mtk_clkevt_set_periodic;
200 evt->dev.set_state_oneshot = mtk_clkevt_shutdown;
201 evt->dev.tick_resume = mtk_clkevt_shutdown;
202 evt->dev.set_next_event = mtk_clkevt_next_event;
203 evt->dev.cpumask = cpu_possible_mask;
204
205 evt->gpt_base = of_io_request_and_map(node, 0, "mtk-timer");
206 if (IS_ERR(evt->gpt_base)) {
207 pr_err("Can't get resource\n");
208 goto err_kzalloc;
209 }
210
211 evt->dev.irq = irq_of_parse_and_map(node, 0);
212 if (evt->dev.irq <= 0) {
213 pr_err("Can't parse IRQ\n");
214 goto err_mem;
215 }
216
217 clk = of_clk_get(node, 0);
218 if (IS_ERR(clk)) {
219 pr_err("Can't get timer clock\n");
220 goto err_irq;
221 }
222
223 if (clk_prepare_enable(clk)) {
224 pr_err("Can't prepare clock\n");
225 goto err_clk_put;
226 }
227 rate = clk_get_rate(clk);
228
229 if (request_irq(evt->dev.irq, mtk_timer_interrupt,
230 IRQF_TIMER | IRQF_IRQPOLL, "mtk_timer", evt)) {
231 pr_err("failed to setup irq %d\n", evt->dev.irq);
232 goto err_clk_disable;
233 }
234
235 evt->ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
236
237 /* Configure clock source */
238 mtk_timer_setup(evt, GPT_CLK_SRC, TIMER_CTRL_OP_FREERUN);
239 clocksource_mmio_init(evt->gpt_base + TIMER_CNT_REG(GPT_CLK_SRC),
240 node->name, rate, 300, 32, clocksource_mmio_readl_up);
241 gpt_sched_reg = evt->gpt_base + TIMER_CNT_REG(GPT_CLK_SRC);
242 sched_clock_register(mtk_read_sched_clock, 32, rate);
243
244 /* Configure clock event */
245 mtk_timer_setup(evt, GPT_CLK_EVT, TIMER_CTRL_OP_REPEAT);
246 clockevents_config_and_register(&evt->dev, rate, 0x3,
247 0xffffffff);
248
249 mtk_timer_enable_irq(evt, GPT_CLK_EVT);
250
251 return 0;
252
253err_clk_disable:
254 clk_disable_unprepare(clk);
255err_clk_put:
256 clk_put(clk);
257err_irq:
258 irq_dispose_mapping(evt->dev.irq);
259err_mem:
260 iounmap(evt->gpt_base);
261 of_address_to_resource(node, 0, &res);
262 release_mem_region(res.start, resource_size(&res));
263err_kzalloc:
264 kfree(evt);
265
266 return -EINVAL;
267}
268TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init);
diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index c337a8100a7b..aa624885e0e2 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -230,7 +230,7 @@ static int __init tegra20_init_timer(struct device_node *np)
230 return ret; 230 return ret;
231 } 231 }
232 232
233 tegra_clockevent.cpumask = cpu_all_mask; 233 tegra_clockevent.cpumask = cpu_possible_mask;
234 tegra_clockevent.irq = tegra_timer_irq.irq; 234 tegra_clockevent.irq = tegra_timer_irq.irq;
235 clockevents_config_and_register(&tegra_clockevent, 1000000, 235 clockevents_config_and_register(&tegra_clockevent, 1000000,
236 0x1, 0x1fffffff); 236 0x1, 0x1fffffff);
@@ -259,6 +259,6 @@ static int __init tegra20_init_rtc(struct device_node *np)
259 else 259 else
260 clk_prepare_enable(clk); 260 clk_prepare_enable(clk);
261 261
262 return register_persistent_clock(NULL, tegra_read_persistent_clock64); 262 return register_persistent_clock(tegra_read_persistent_clock64);
263} 263}
264TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc); 264TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
diff --git a/drivers/clocksource/timer-atcpit100.c b/drivers/clocksource/timer-atcpit100.c
index 5e23d7b4a722..b4bd2f5b801d 100644
--- a/drivers/clocksource/timer-atcpit100.c
+++ b/drivers/clocksource/timer-atcpit100.c
@@ -185,7 +185,7 @@ static struct timer_of to = {
185 .set_state_oneshot = atcpit100_clkevt_set_oneshot, 185 .set_state_oneshot = atcpit100_clkevt_set_oneshot,
186 .tick_resume = atcpit100_clkevt_shutdown, 186 .tick_resume = atcpit100_clkevt_shutdown,
187 .set_next_event = atcpit100_clkevt_next_event, 187 .set_next_event = atcpit100_clkevt_next_event,
188 .cpumask = cpu_all_mask, 188 .cpumask = cpu_possible_mask,
189 }, 189 },
190 190
191 .of_irq = { 191 .of_irq = {
diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c
index 0eee03250cfc..f5b2eda30bf3 100644
--- a/drivers/clocksource/timer-keystone.c
+++ b/drivers/clocksource/timer-keystone.c
@@ -211,7 +211,7 @@ static int __init keystone_timer_init(struct device_node *np)
211 event_dev->set_state_shutdown = keystone_shutdown; 211 event_dev->set_state_shutdown = keystone_shutdown;
212 event_dev->set_state_periodic = keystone_set_periodic; 212 event_dev->set_state_periodic = keystone_set_periodic;
213 event_dev->set_state_oneshot = keystone_shutdown; 213 event_dev->set_state_oneshot = keystone_shutdown;
214 event_dev->cpumask = cpu_all_mask; 214 event_dev->cpumask = cpu_possible_mask;
215 event_dev->owner = THIS_MODULE; 215 event_dev->owner = THIS_MODULE;
216 event_dev->name = TIMER_NAME; 216 event_dev->name = TIMER_NAME;
217 event_dev->irq = irq; 217 event_dev->irq = irq;
diff --git a/drivers/clocksource/timer-mediatek.c b/drivers/clocksource/timer-mediatek.c
new file mode 100644
index 000000000000..eb10321f8517
--- /dev/null
+++ b/drivers/clocksource/timer-mediatek.c
@@ -0,0 +1,328 @@
1/*
2 * Mediatek SoCs General-Purpose Timer handling.
3 *
4 * Copyright (C) 2014 Matthias Brugger
5 *
6 * Matthias Brugger <matthias.bgg@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/clockchips.h>
22#include <linux/clocksource.h>
23#include <linux/interrupt.h>
24#include <linux/irqreturn.h>
25#include <linux/sched_clock.h>
26#include <linux/slab.h>
27#include "timer-of.h"
28
29#define TIMER_CLK_EVT (1)
30#define TIMER_CLK_SRC (2)
31
32#define TIMER_SYNC_TICKS (3)
33
34/* gpt */
35#define GPT_IRQ_EN_REG 0x00
36#define GPT_IRQ_ENABLE(val) BIT((val) - 1)
37#define GPT_IRQ_ACK_REG 0x08
38#define GPT_IRQ_ACK(val) BIT((val) - 1)
39
40#define GPT_CTRL_REG(val) (0x10 * (val))
41#define GPT_CTRL_OP(val) (((val) & 0x3) << 4)
42#define GPT_CTRL_OP_ONESHOT (0)
43#define GPT_CTRL_OP_REPEAT (1)
44#define GPT_CTRL_OP_FREERUN (3)
45#define GPT_CTRL_CLEAR (2)
46#define GPT_CTRL_ENABLE (1)
47#define GPT_CTRL_DISABLE (0)
48
49#define GPT_CLK_REG(val) (0x04 + (0x10 * (val)))
50#define GPT_CLK_SRC(val) (((val) & 0x1) << 4)
51#define GPT_CLK_SRC_SYS13M (0)
52#define GPT_CLK_SRC_RTC32K (1)
53#define GPT_CLK_DIV1 (0x0)
54#define GPT_CLK_DIV2 (0x1)
55
56#define GPT_CNT_REG(val) (0x08 + (0x10 * (val)))
57#define GPT_CMP_REG(val) (0x0C + (0x10 * (val)))
58
59/* system timer */
60#define SYST_BASE (0x40)
61
62#define SYST_CON (SYST_BASE + 0x0)
63#define SYST_VAL (SYST_BASE + 0x4)
64
65#define SYST_CON_REG(to) (timer_of_base(to) + SYST_CON)
66#define SYST_VAL_REG(to) (timer_of_base(to) + SYST_VAL)
67
68/*
69 * SYST_CON_EN: Clock enable. Shall be set to
70 * - Start timer countdown.
71 * - Allow timeout ticks being updated.
72 * - Allow changing interrupt functions.
73 *
74 * SYST_CON_IRQ_EN: Set to allow interrupt.
75 *
76 * SYST_CON_IRQ_CLR: Set to clear interrupt.
77 */
78#define SYST_CON_EN BIT(0)
79#define SYST_CON_IRQ_EN BIT(1)
80#define SYST_CON_IRQ_CLR BIT(4)
81
82static void __iomem *gpt_sched_reg __read_mostly;
83
84static void mtk_syst_ack_irq(struct timer_of *to)
85{
86 /* Clear and disable interrupt */
87 writel(SYST_CON_IRQ_CLR | SYST_CON_EN, SYST_CON_REG(to));
88}
89
90static irqreturn_t mtk_syst_handler(int irq, void *dev_id)
91{
92 struct clock_event_device *clkevt = dev_id;
93 struct timer_of *to = to_timer_of(clkevt);
94
95 mtk_syst_ack_irq(to);
96 clkevt->event_handler(clkevt);
97
98 return IRQ_HANDLED;
99}
100
101static int mtk_syst_clkevt_next_event(unsigned long ticks,
102 struct clock_event_device *clkevt)
103{
104 struct timer_of *to = to_timer_of(clkevt);
105
106 /* Enable clock to allow timeout tick update later */
107 writel(SYST_CON_EN, SYST_CON_REG(to));
108
109 /*
110 * Write new timeout ticks. Timer shall start countdown
111 * after timeout ticks are updated.
112 */
113 writel(ticks, SYST_VAL_REG(to));
114
115 /* Enable interrupt */
116 writel(SYST_CON_EN | SYST_CON_IRQ_EN, SYST_CON_REG(to));
117
118 return 0;
119}
120
121static int mtk_syst_clkevt_shutdown(struct clock_event_device *clkevt)
122{
123 /* Disable timer */
124 writel(0, SYST_CON_REG(to_timer_of(clkevt)));
125
126 return 0;
127}
128
129static int mtk_syst_clkevt_resume(struct clock_event_device *clkevt)
130{
131 return mtk_syst_clkevt_shutdown(clkevt);
132}
133
134static int mtk_syst_clkevt_oneshot(struct clock_event_device *clkevt)
135{
136 return 0;
137}
138
139static u64 notrace mtk_gpt_read_sched_clock(void)
140{
141 return readl_relaxed(gpt_sched_reg);
142}
143
144static void mtk_gpt_clkevt_time_stop(struct timer_of *to, u8 timer)
145{
146 u32 val;
147
148 val = readl(timer_of_base(to) + GPT_CTRL_REG(timer));
149 writel(val & ~GPT_CTRL_ENABLE, timer_of_base(to) +
150 GPT_CTRL_REG(timer));
151}
152
153static void mtk_gpt_clkevt_time_setup(struct timer_of *to,
154 unsigned long delay, u8 timer)
155{
156 writel(delay, timer_of_base(to) + GPT_CMP_REG(timer));
157}
158
159static void mtk_gpt_clkevt_time_start(struct timer_of *to,
160 bool periodic, u8 timer)
161{
162 u32 val;
163
164 /* Acknowledge interrupt */
165 writel(GPT_IRQ_ACK(timer), timer_of_base(to) + GPT_IRQ_ACK_REG);
166
167 val = readl(timer_of_base(to) + GPT_CTRL_REG(timer));
168
169 /* Clear 2 bit timer operation mode field */
170 val &= ~GPT_CTRL_OP(0x3);
171
172 if (periodic)
173 val |= GPT_CTRL_OP(GPT_CTRL_OP_REPEAT);
174 else
175 val |= GPT_CTRL_OP(GPT_CTRL_OP_ONESHOT);
176
177 writel(val | GPT_CTRL_ENABLE | GPT_CTRL_CLEAR,
178 timer_of_base(to) + GPT_CTRL_REG(timer));
179}
180
181static int mtk_gpt_clkevt_shutdown(struct clock_event_device *clk)
182{
183 mtk_gpt_clkevt_time_stop(to_timer_of(clk), TIMER_CLK_EVT);
184
185 return 0;
186}
187
188static int mtk_gpt_clkevt_set_periodic(struct clock_event_device *clk)
189{
190 struct timer_of *to = to_timer_of(clk);
191
192 mtk_gpt_clkevt_time_stop(to, TIMER_CLK_EVT);
193 mtk_gpt_clkevt_time_setup(to, to->of_clk.period, TIMER_CLK_EVT);
194 mtk_gpt_clkevt_time_start(to, true, TIMER_CLK_EVT);
195
196 return 0;
197}
198
199static int mtk_gpt_clkevt_next_event(unsigned long event,
200 struct clock_event_device *clk)
201{
202 struct timer_of *to = to_timer_of(clk);
203
204 mtk_gpt_clkevt_time_stop(to, TIMER_CLK_EVT);
205 mtk_gpt_clkevt_time_setup(to, event, TIMER_CLK_EVT);
206 mtk_gpt_clkevt_time_start(to, false, TIMER_CLK_EVT);
207
208 return 0;
209}
210
211static irqreturn_t mtk_gpt_interrupt(int irq, void *dev_id)
212{
213 struct clock_event_device *clkevt = (struct clock_event_device *)dev_id;
214 struct timer_of *to = to_timer_of(clkevt);
215
216 /* Acknowledge timer0 irq */
217 writel(GPT_IRQ_ACK(TIMER_CLK_EVT), timer_of_base(to) + GPT_IRQ_ACK_REG);
218 clkevt->event_handler(clkevt);
219
220 return IRQ_HANDLED;
221}
222
223static void
224__init mtk_gpt_setup(struct timer_of *to, u8 timer, u8 option)
225{
226 writel(GPT_CTRL_CLEAR | GPT_CTRL_DISABLE,
227 timer_of_base(to) + GPT_CTRL_REG(timer));
228
229 writel(GPT_CLK_SRC(GPT_CLK_SRC_SYS13M) | GPT_CLK_DIV1,
230 timer_of_base(to) + GPT_CLK_REG(timer));
231
232 writel(0x0, timer_of_base(to) + GPT_CMP_REG(timer));
233
234 writel(GPT_CTRL_OP(option) | GPT_CTRL_ENABLE,
235 timer_of_base(to) + GPT_CTRL_REG(timer));
236}
237
238static void mtk_gpt_enable_irq(struct timer_of *to, u8 timer)
239{
240 u32 val;
241
242 /* Disable all interrupts */
243 writel(0x0, timer_of_base(to) + GPT_IRQ_EN_REG);
244
245 /* Acknowledge all spurious pending interrupts */
246 writel(0x3f, timer_of_base(to) + GPT_IRQ_ACK_REG);
247
248 val = readl(timer_of_base(to) + GPT_IRQ_EN_REG);
249 writel(val | GPT_IRQ_ENABLE(timer),
250 timer_of_base(to) + GPT_IRQ_EN_REG);
251}
252
253static struct timer_of to = {
254 .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
255
256 .clkevt = {
257 .name = "mtk-clkevt",
258 .rating = 300,
259 .cpumask = cpu_possible_mask,
260 },
261
262 .of_irq = {
263 .flags = IRQF_TIMER | IRQF_IRQPOLL,
264 },
265};
266
267static int __init mtk_syst_init(struct device_node *node)
268{
269 int ret;
270
271 to.clkevt.features = CLOCK_EVT_FEAT_DYNIRQ | CLOCK_EVT_FEAT_ONESHOT;
272 to.clkevt.set_state_shutdown = mtk_syst_clkevt_shutdown;
273 to.clkevt.set_state_oneshot = mtk_syst_clkevt_oneshot;
274 to.clkevt.tick_resume = mtk_syst_clkevt_resume;
275 to.clkevt.set_next_event = mtk_syst_clkevt_next_event;
276 to.of_irq.handler = mtk_syst_handler;
277
278 ret = timer_of_init(node, &to);
279 if (ret)
280 goto err;
281
282 clockevents_config_and_register(&to.clkevt, timer_of_rate(&to),
283 TIMER_SYNC_TICKS, 0xffffffff);
284
285 return 0;
286err:
287 timer_of_cleanup(&to);
288 return ret;
289}
290
291static int __init mtk_gpt_init(struct device_node *node)
292{
293 int ret;
294
295 to.clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
296 to.clkevt.set_state_shutdown = mtk_gpt_clkevt_shutdown;
297 to.clkevt.set_state_periodic = mtk_gpt_clkevt_set_periodic;
298 to.clkevt.set_state_oneshot = mtk_gpt_clkevt_shutdown;
299 to.clkevt.tick_resume = mtk_gpt_clkevt_shutdown;
300 to.clkevt.set_next_event = mtk_gpt_clkevt_next_event;
301 to.of_irq.handler = mtk_gpt_interrupt;
302
303 ret = timer_of_init(node, &to);
304 if (ret)
305 goto err;
306
307 /* Configure clock source */
308 mtk_gpt_setup(&to, TIMER_CLK_SRC, GPT_CTRL_OP_FREERUN);
309 clocksource_mmio_init(timer_of_base(&to) + GPT_CNT_REG(TIMER_CLK_SRC),
310 node->name, timer_of_rate(&to), 300, 32,
311 clocksource_mmio_readl_up);
312 gpt_sched_reg = timer_of_base(&to) + GPT_CNT_REG(TIMER_CLK_SRC);
313 sched_clock_register(mtk_gpt_read_sched_clock, 32, timer_of_rate(&to));
314
315 /* Configure clock event */
316 mtk_gpt_setup(&to, TIMER_CLK_EVT, GPT_CTRL_OP_REPEAT);
317 clockevents_config_and_register(&to.clkevt, timer_of_rate(&to),
318 TIMER_SYNC_TICKS, 0xffffffff);
319
320 mtk_gpt_enable_irq(&to, TIMER_CLK_EVT);
321
322 return 0;
323err:
324 timer_of_cleanup(&to);
325 return ret;
326}
327TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_gpt_init);
328TIMER_OF_DECLARE(mtk_mt6765, "mediatek,mt6765-timer", mtk_syst_init);
diff --git a/drivers/clocksource/timer-sprd.c b/drivers/clocksource/timer-sprd.c
index ef9ebeafb3ed..430cb99d8d79 100644
--- a/drivers/clocksource/timer-sprd.c
+++ b/drivers/clocksource/timer-sprd.c
@@ -156,4 +156,54 @@ static int __init sprd_timer_init(struct device_node *np)
156 return 0; 156 return 0;
157} 157}
158 158
159static struct timer_of suspend_to = {
160 .flags = TIMER_OF_BASE | TIMER_OF_CLOCK,
161};
162
163static u64 sprd_suspend_timer_read(struct clocksource *cs)
164{
165 return ~(u64)readl_relaxed(timer_of_base(&suspend_to) +
166 TIMER_VALUE_SHDW_LO) & cs->mask;
167}
168
169static int sprd_suspend_timer_enable(struct clocksource *cs)
170{
171 sprd_timer_update_counter(timer_of_base(&suspend_to),
172 TIMER_VALUE_LO_MASK);
173 sprd_timer_enable(timer_of_base(&suspend_to), TIMER_CTL_PERIOD_MODE);
174
175 return 0;
176}
177
178static void sprd_suspend_timer_disable(struct clocksource *cs)
179{
180 sprd_timer_disable(timer_of_base(&suspend_to));
181}
182
183static struct clocksource suspend_clocksource = {
184 .name = "sprd_suspend_timer",
185 .rating = 200,
186 .read = sprd_suspend_timer_read,
187 .enable = sprd_suspend_timer_enable,
188 .disable = sprd_suspend_timer_disable,
189 .mask = CLOCKSOURCE_MASK(32),
190 .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_SUSPEND_NONSTOP,
191};
192
193static int __init sprd_suspend_timer_init(struct device_node *np)
194{
195 int ret;
196
197 ret = timer_of_init(np, &suspend_to);
198 if (ret)
199 return ret;
200
201 clocksource_register_hz(&suspend_clocksource,
202 timer_of_rate(&suspend_to));
203
204 return 0;
205}
206
159TIMER_OF_DECLARE(sc9860_timer, "sprd,sc9860-timer", sprd_timer_init); 207TIMER_OF_DECLARE(sc9860_timer, "sprd,sc9860-timer", sprd_timer_init);
208TIMER_OF_DECLARE(sc9860_persistent_timer, "sprd,sc9860-suspend-timer",
209 sprd_suspend_timer_init);
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index 880a861ab3c8..29e2e1a78a43 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -78,8 +78,7 @@ static struct ti_32k ti_32k_timer = {
78 .rating = 250, 78 .rating = 250,
79 .read = ti_32k_read_cycles, 79 .read = ti_32k_read_cycles,
80 .mask = CLOCKSOURCE_MASK(32), 80 .mask = CLOCKSOURCE_MASK(32),
81 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 81 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
82 CLOCK_SOURCE_SUSPEND_NONSTOP,
83 }, 82 },
84}; 83};
85 84
diff --git a/drivers/clocksource/zevio-timer.c b/drivers/clocksource/zevio-timer.c
index a6a0338eea77..f74689334f7c 100644
--- a/drivers/clocksource/zevio-timer.c
+++ b/drivers/clocksource/zevio-timer.c
@@ -162,7 +162,7 @@ static int __init zevio_timer_add(struct device_node *node)
162 timer->clkevt.set_state_oneshot = zevio_timer_set_oneshot; 162 timer->clkevt.set_state_oneshot = zevio_timer_set_oneshot;
163 timer->clkevt.tick_resume = zevio_timer_set_oneshot; 163 timer->clkevt.tick_resume = zevio_timer_set_oneshot;
164 timer->clkevt.rating = 200; 164 timer->clkevt.rating = 200;
165 timer->clkevt.cpumask = cpu_all_mask; 165 timer->clkevt.cpumask = cpu_possible_mask;
166 timer->clkevt.features = CLOCK_EVT_FEAT_ONESHOT; 166 timer->clkevt.features = CLOCK_EVT_FEAT_ONESHOT;
167 timer->clkevt.irq = irqnr; 167 timer->clkevt.irq = irqnr;
168 168
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3c3971256130..d4ed0022b0dd 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -311,12 +311,20 @@ static DEFINE_MUTEX(intel_pstate_limits_lock);
311 311
312#ifdef CONFIG_ACPI 312#ifdef CONFIG_ACPI
313 313
314static bool intel_pstate_get_ppc_enable_status(void) 314static bool intel_pstate_acpi_pm_profile_server(void)
315{ 315{
316 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || 316 if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
317 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) 317 acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
318 return true; 318 return true;
319 319
320 return false;
321}
322
323static bool intel_pstate_get_ppc_enable_status(void)
324{
325 if (intel_pstate_acpi_pm_profile_server())
326 return true;
327
320 return acpi_ppc; 328 return acpi_ppc;
321} 329}
322 330
@@ -459,6 +467,11 @@ static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *pol
459static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) 467static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
460{ 468{
461} 469}
470
471static inline bool intel_pstate_acpi_pm_profile_server(void)
472{
473 return false;
474}
462#endif 475#endif
463 476
464static inline void update_turbo_state(void) 477static inline void update_turbo_state(void)
@@ -1841,7 +1854,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
1841 intel_pstate_hwp_enable(cpu); 1854 intel_pstate_hwp_enable(cpu);
1842 1855
1843 id = x86_match_cpu(intel_pstate_hwp_boost_ids); 1856 id = x86_match_cpu(intel_pstate_hwp_boost_ids);
1844 if (id) 1857 if (id && intel_pstate_acpi_pm_profile_server())
1845 hwp_boost = true; 1858 hwp_boost = true;
1846 } 1859 }
1847 1860
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 1c6cbda56afe..09d823d36d3a 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
266 return; 266 return;
267 } 267 }
268 268
269 count -= initial;
270
269 if (initial) 271 if (initial)
270 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 272 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
271 : "+S"(input), "+D"(output) 273 : "+S"(input), "+D"(output)
@@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
273 275
274 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 276 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
275 : "+S"(input), "+D"(output) 277 : "+S"(input), "+D"(output)
276 : "d"(control_word), "b"(key), "c"(count - initial)); 278 : "d"(control_word), "b"(key), "c"(count));
277} 279}
278 280
279static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, 281static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
@@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
284 if (count < cbc_fetch_blocks) 286 if (count < cbc_fetch_blocks)
285 return cbc_crypt(input, output, key, iv, control_word, count); 287 return cbc_crypt(input, output, key, iv, control_word, count);
286 288
289 count -= initial;
290
287 if (initial) 291 if (initial)
288 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 292 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
289 : "+S" (input), "+D" (output), "+a" (iv) 293 : "+S" (input), "+D" (output), "+a" (iv)
@@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
291 295
292 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 296 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
293 : "+S" (input), "+D" (output), "+a" (iv) 297 : "+S" (input), "+D" (output), "+a" (iv)
294 : "d" (control_word), "b" (key), "c" (count-initial)); 298 : "d" (control_word), "b" (key), "c" (count));
295 return iv; 299 return iv;
296} 300}
297 301
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 781a4a337557..d8e159feb573 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -87,6 +87,18 @@ config EFI_RUNTIME_WRAPPERS
87config EFI_ARMSTUB 87config EFI_ARMSTUB
88 bool 88 bool
89 89
90config EFI_ARMSTUB_DTB_LOADER
91 bool "Enable the DTB loader"
92 depends on EFI_ARMSTUB
93 help
94 Select this config option to add support for the dtb= command
95 line parameter, allowing a device tree blob to be loaded into
96 memory from the EFI System Partition by the stub.
97
98 The device tree is typically provided by the platform or by
99 the bootloader, so this option is mostly for development
100 purposes only.
101
90config EFI_BOOTLOADER_CONTROL 102config EFI_BOOTLOADER_CONTROL
91 tristate "EFI Bootloader Control" 103 tristate "EFI Bootloader Control"
92 depends on EFI_VARS 104 depends on EFI_VARS
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 3bf0dca378a6..a7902fccdcfa 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -48,8 +48,21 @@ u64 cper_next_record_id(void)
48{ 48{
49 static atomic64_t seq; 49 static atomic64_t seq;
50 50
51 if (!atomic64_read(&seq)) 51 if (!atomic64_read(&seq)) {
52 atomic64_set(&seq, ((u64)get_seconds()) << 32); 52 time64_t time = ktime_get_real_seconds();
53
54 /*
55 * This code is unlikely to still be needed in year 2106,
56 * but just in case, let's use a few more bits for timestamps
57 * after y2038 to be sure they keep increasing monotonically
58 * for the next few hundred years...
59 */
60 if (time < 0x80000000)
61 atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
62 else
63 atomic64_set(&seq, 0x8000000000000000ull |
64 ktime_get_real_seconds() << 24);
65 }
53 66
54 return atomic64_inc_return(&seq); 67 return atomic64_inc_return(&seq);
55} 68}
@@ -459,7 +472,7 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
459 else 472 else
460 goto err_section_too_small; 473 goto err_section_too_small;
461#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) 474#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
462 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) { 475 } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
463 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata); 476 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
464 477
465 printk("%ssection_type: ARM processor error\n", newpfx); 478 printk("%ssection_type: ARM processor error\n", newpfx);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 232f4915223b..2a29dd9c986d 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -82,8 +82,11 @@ struct mm_struct efi_mm = {
82 .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), 82 .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
83 .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), 83 .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
84 .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), 84 .mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
85 .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
85}; 86};
86 87
88struct workqueue_struct *efi_rts_wq;
89
87static bool disable_runtime; 90static bool disable_runtime;
88static int __init setup_noefi(char *arg) 91static int __init setup_noefi(char *arg)
89{ 92{
@@ -337,6 +340,18 @@ static int __init efisubsys_init(void)
337 if (!efi_enabled(EFI_BOOT)) 340 if (!efi_enabled(EFI_BOOT))
338 return 0; 341 return 0;
339 342
343 /*
344 * Since we process only one efi_runtime_service() at a time, an
345 * ordered workqueue (which creates only one execution context)
346 * should suffice all our needs.
347 */
348 efi_rts_wq = alloc_ordered_workqueue("efi_rts_wq", 0);
349 if (!efi_rts_wq) {
350 pr_err("Creating efi_rts_wq failed, EFI runtime services disabled.\n");
351 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
352 return 0;
353 }
354
340 /* We register the efi directory at /sys/firmware/efi */ 355 /* We register the efi directory at /sys/firmware/efi */
341 efi_kobj = kobject_create_and_add("efi", firmware_kobj); 356 efi_kobj = kobject_create_and_add("efi", firmware_kobj);
342 if (!efi_kobj) { 357 if (!efi_kobj) {
@@ -388,7 +403,7 @@ subsys_initcall(efisubsys_init);
388 * and if so, populate the supplied memory descriptor with the appropriate 403 * and if so, populate the supplied memory descriptor with the appropriate
389 * data. 404 * data.
390 */ 405 */
391int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) 406int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
392{ 407{
393 efi_memory_desc_t *md; 408 efi_memory_desc_t *md;
394 409
@@ -406,12 +421,6 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
406 u64 size; 421 u64 size;
407 u64 end; 422 u64 end;
408 423
409 if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
410 md->type != EFI_BOOT_SERVICES_DATA &&
411 md->type != EFI_RUNTIME_SERVICES_DATA) {
412 continue;
413 }
414
415 size = md->num_pages << EFI_PAGE_SHIFT; 424 size = md->num_pages << EFI_PAGE_SHIFT;
416 end = md->phys_addr + size; 425 end = md->phys_addr + size;
417 if (phys_addr >= md->phys_addr && phys_addr < end) { 426 if (phys_addr >= md->phys_addr && phys_addr < end) {
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index 1ab80e06e7c5..5d06bd247d07 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -250,7 +250,10 @@ void __init efi_esrt_init(void)
250 return; 250 return;
251 251
252 rc = efi_mem_desc_lookup(efi.esrt, &md); 252 rc = efi_mem_desc_lookup(efi.esrt, &md);
253 if (rc < 0) { 253 if (rc < 0 ||
254 (!(md.attribute & EFI_MEMORY_RUNTIME) &&
255 md.type != EFI_BOOT_SERVICES_DATA &&
256 md.type != EFI_RUNTIME_SERVICES_DATA)) {
254 pr_warn("ESRT header is not in the memory map.\n"); 257 pr_warn("ESRT header is not in the memory map.\n");
255 return; 258 return;
256 } 259 }
@@ -326,7 +329,8 @@ void __init efi_esrt_init(void)
326 329
327 end = esrt_data + size; 330 end = esrt_data + size;
328 pr_info("Reserving ESRT space from %pa to %pa.\n", &esrt_data, &end); 331 pr_info("Reserving ESRT space from %pa to %pa.\n", &esrt_data, &end);
329 efi_mem_reserve(esrt_data, esrt_data_size); 332 if (md.type == EFI_BOOT_SERVICES_DATA)
333 efi_mem_reserve(esrt_data, esrt_data_size);
330 334
331 pr_debug("esrt-init: loaded.\n"); 335 pr_debug("esrt-init: loaded.\n");
332} 336}
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 01a9d78ee415..6920033de6d4 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -40,31 +40,6 @@
40 40
41static u64 virtmap_base = EFI_RT_VIRTUAL_BASE; 41static u64 virtmap_base = EFI_RT_VIRTUAL_BASE;
42 42
43efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
44 void *__image, void **__fh)
45{
46 efi_file_io_interface_t *io;
47 efi_loaded_image_t *image = __image;
48 efi_file_handle_t *fh;
49 efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
50 efi_status_t status;
51 void *handle = (void *)(unsigned long)image->device_handle;
52
53 status = sys_table_arg->boottime->handle_protocol(handle,
54 &fs_proto, (void **)&io);
55 if (status != EFI_SUCCESS) {
56 efi_printk(sys_table_arg, "Failed to handle fs_proto\n");
57 return status;
58 }
59
60 status = io->open_volume(io, &fh);
61 if (status != EFI_SUCCESS)
62 efi_printk(sys_table_arg, "Failed to open volume\n");
63
64 *__fh = fh;
65 return status;
66}
67
68void efi_char16_printk(efi_system_table_t *sys_table_arg, 43void efi_char16_printk(efi_system_table_t *sys_table_arg,
69 efi_char16_t *str) 44 efi_char16_t *str)
70{ 45{
@@ -202,9 +177,10 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
202 * 'dtb=' unless UEFI Secure Boot is disabled. We assume that secure 177 * 'dtb=' unless UEFI Secure Boot is disabled. We assume that secure
203 * boot is enabled if we can't determine its state. 178 * boot is enabled if we can't determine its state.
204 */ 179 */
205 if (secure_boot != efi_secureboot_mode_disabled && 180 if (!IS_ENABLED(CONFIG_EFI_ARMSTUB_DTB_LOADER) ||
206 strstr(cmdline_ptr, "dtb=")) { 181 secure_boot != efi_secureboot_mode_disabled) {
207 pr_efi(sys_table, "Ignoring DTB from command line.\n"); 182 if (strstr(cmdline_ptr, "dtb="))
183 pr_efi(sys_table, "Ignoring DTB from command line.\n");
208 } else { 184 } else {
209 status = handle_cmdline_files(sys_table, image, cmdline_ptr, 185 status = handle_cmdline_files(sys_table, image, cmdline_ptr,
210 "dtb=", 186 "dtb=",
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 50a9cab5a834..e94975f4655b 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -413,6 +413,34 @@ static efi_status_t efi_file_close(void *handle)
413 return efi_call_proto(efi_file_handle, close, handle); 413 return efi_call_proto(efi_file_handle, close, handle);
414} 414}
415 415
416static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
417 efi_loaded_image_t *image,
418 efi_file_handle_t **__fh)
419{
420 efi_file_io_interface_t *io;
421 efi_file_handle_t *fh;
422 efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
423 efi_status_t status;
424 void *handle = (void *)(unsigned long)efi_table_attr(efi_loaded_image,
425 device_handle,
426 image);
427
428 status = efi_call_early(handle_protocol, handle,
429 &fs_proto, (void **)&io);
430 if (status != EFI_SUCCESS) {
431 efi_printk(sys_table_arg, "Failed to handle fs_proto\n");
432 return status;
433 }
434
435 status = efi_call_proto(efi_file_io_interface, open_volume, io, &fh);
436 if (status != EFI_SUCCESS)
437 efi_printk(sys_table_arg, "Failed to open volume\n");
438 else
439 *__fh = fh;
440
441 return status;
442}
443
416/* 444/*
417 * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi= 445 * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi=
418 * option, e.g. efi=nochunk. 446 * option, e.g. efi=nochunk.
@@ -563,8 +591,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
563 591
564 /* Only open the volume once. */ 592 /* Only open the volume once. */
565 if (!i) { 593 if (!i) {
566 status = efi_open_volume(sys_table_arg, image, 594 status = efi_open_volume(sys_table_arg, image, &fh);
567 (void **)&fh);
568 if (status != EFI_SUCCESS) 595 if (status != EFI_SUCCESS)
569 goto free_files; 596 goto free_files;
570 } 597 }
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index f59564b72ddc..32799cf039ef 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -36,9 +36,6 @@ extern int __pure is_quiet(void);
36 36
37void efi_char16_printk(efi_system_table_t *, efi_char16_t *); 37void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
38 38
39efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
40 void **__fh);
41
42unsigned long get_dram_base(efi_system_table_t *sys_table_arg); 39unsigned long get_dram_base(efi_system_table_t *sys_table_arg);
43 40
44efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, 41efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index ae54870b2788..aa66cbf23512 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -1,6 +1,15 @@
1/* 1/*
2 * runtime-wrappers.c - Runtime Services function call wrappers 2 * runtime-wrappers.c - Runtime Services function call wrappers
3 * 3 *
4 * Implementation summary:
5 * -----------------------
6 * 1. When user/kernel thread requests to execute efi_runtime_service(),
7 * enqueue work to efi_rts_wq.
8 * 2. Caller thread waits for completion until the work is finished
9 * because it's dependent on the return status and execution of
10 * efi_runtime_service().
11 * For instance, get_variable() and get_next_variable().
12 *
4 * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> 13 * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 * 14 *
6 * Split off from arch/x86/platform/efi/efi.c 15 * Split off from arch/x86/platform/efi/efi.c
@@ -22,6 +31,9 @@
22#include <linux/mutex.h> 31#include <linux/mutex.h>
23#include <linux/semaphore.h> 32#include <linux/semaphore.h>
24#include <linux/stringify.h> 33#include <linux/stringify.h>
34#include <linux/workqueue.h>
35#include <linux/completion.h>
36
25#include <asm/efi.h> 37#include <asm/efi.h>
26 38
27/* 39/*
@@ -33,6 +45,76 @@
33#define __efi_call_virt(f, args...) \ 45#define __efi_call_virt(f, args...) \
34 __efi_call_virt_pointer(efi.systab->runtime, f, args) 46 __efi_call_virt_pointer(efi.systab->runtime, f, args)
35 47
48/* efi_runtime_service() function identifiers */
49enum efi_rts_ids {
50 GET_TIME,
51 SET_TIME,
52 GET_WAKEUP_TIME,
53 SET_WAKEUP_TIME,
54 GET_VARIABLE,
55 GET_NEXT_VARIABLE,
56 SET_VARIABLE,
57 QUERY_VARIABLE_INFO,
58 GET_NEXT_HIGH_MONO_COUNT,
59 UPDATE_CAPSULE,
60 QUERY_CAPSULE_CAPS,
61};
62
63/*
64 * efi_runtime_work: Details of EFI Runtime Service work
65 * @arg<1-5>: EFI Runtime Service function arguments
66 * @status: Status of executing EFI Runtime Service
67 * @efi_rts_id: EFI Runtime Service function identifier
68 * @efi_rts_comp: Struct used for handling completions
69 */
70struct efi_runtime_work {
71 void *arg1;
72 void *arg2;
73 void *arg3;
74 void *arg4;
75 void *arg5;
76 efi_status_t status;
77 struct work_struct work;
78 enum efi_rts_ids efi_rts_id;
79 struct completion efi_rts_comp;
80};
81
82/*
83 * efi_queue_work: Queue efi_runtime_service() and wait until it's done
84 * @rts: efi_runtime_service() function identifier
85 * @rts_arg<1-5>: efi_runtime_service() function arguments
86 *
87 * Accesses to efi_runtime_services() are serialized by a binary
88 * semaphore (efi_runtime_lock) and caller waits until the work is
89 * finished, hence _only_ one work is queued at a time and the caller
90 * thread waits for completion.
91 */
92#define efi_queue_work(_rts, _arg1, _arg2, _arg3, _arg4, _arg5) \
93({ \
94 struct efi_runtime_work efi_rts_work; \
95 efi_rts_work.status = EFI_ABORTED; \
96 \
97 init_completion(&efi_rts_work.efi_rts_comp); \
98 INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts); \
99 efi_rts_work.arg1 = _arg1; \
100 efi_rts_work.arg2 = _arg2; \
101 efi_rts_work.arg3 = _arg3; \
102 efi_rts_work.arg4 = _arg4; \
103 efi_rts_work.arg5 = _arg5; \
104 efi_rts_work.efi_rts_id = _rts; \
105 \
106 /* \
107 * queue_work() returns 0 if work was already on queue, \
108 * _ideally_ this should never happen. \
109 */ \
110 if (queue_work(efi_rts_wq, &efi_rts_work.work)) \
111 wait_for_completion(&efi_rts_work.efi_rts_comp); \
112 else \
113 pr_err("Failed to queue work to efi_rts_wq.\n"); \
114 \
115 efi_rts_work.status; \
116})
117
36void efi_call_virt_check_flags(unsigned long flags, const char *call) 118void efi_call_virt_check_flags(unsigned long flags, const char *call)
37{ 119{
38 unsigned long cur_flags, mismatch; 120 unsigned long cur_flags, mismatch;
@@ -90,13 +172,98 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
90 */ 172 */
91static DEFINE_SEMAPHORE(efi_runtime_lock); 173static DEFINE_SEMAPHORE(efi_runtime_lock);
92 174
175/*
176 * Calls the appropriate efi_runtime_service() with the appropriate
177 * arguments.
178 *
179 * Semantics followed by efi_call_rts() to understand efi_runtime_work:
180 * 1. If argument was a pointer, recast it from void pointer to original
181 * pointer type.
182 * 2. If argument was a value, recast it from void pointer to original
183 * pointer type and dereference it.
184 */
185static void efi_call_rts(struct work_struct *work)
186{
187 struct efi_runtime_work *efi_rts_work;
188 void *arg1, *arg2, *arg3, *arg4, *arg5;
189 efi_status_t status = EFI_NOT_FOUND;
190
191 efi_rts_work = container_of(work, struct efi_runtime_work, work);
192 arg1 = efi_rts_work->arg1;
193 arg2 = efi_rts_work->arg2;
194 arg3 = efi_rts_work->arg3;
195 arg4 = efi_rts_work->arg4;
196 arg5 = efi_rts_work->arg5;
197
198 switch (efi_rts_work->efi_rts_id) {
199 case GET_TIME:
200 status = efi_call_virt(get_time, (efi_time_t *)arg1,
201 (efi_time_cap_t *)arg2);
202 break;
203 case SET_TIME:
204 status = efi_call_virt(set_time, (efi_time_t *)arg1);
205 break;
206 case GET_WAKEUP_TIME:
207 status = efi_call_virt(get_wakeup_time, (efi_bool_t *)arg1,
208 (efi_bool_t *)arg2, (efi_time_t *)arg3);
209 break;
210 case SET_WAKEUP_TIME:
211 status = efi_call_virt(set_wakeup_time, *(efi_bool_t *)arg1,
212 (efi_time_t *)arg2);
213 break;
214 case GET_VARIABLE:
215 status = efi_call_virt(get_variable, (efi_char16_t *)arg1,
216 (efi_guid_t *)arg2, (u32 *)arg3,
217 (unsigned long *)arg4, (void *)arg5);
218 break;
219 case GET_NEXT_VARIABLE:
220 status = efi_call_virt(get_next_variable, (unsigned long *)arg1,
221 (efi_char16_t *)arg2,
222 (efi_guid_t *)arg3);
223 break;
224 case SET_VARIABLE:
225 status = efi_call_virt(set_variable, (efi_char16_t *)arg1,
226 (efi_guid_t *)arg2, *(u32 *)arg3,
227 *(unsigned long *)arg4, (void *)arg5);
228 break;
229 case QUERY_VARIABLE_INFO:
230 status = efi_call_virt(query_variable_info, *(u32 *)arg1,
231 (u64 *)arg2, (u64 *)arg3, (u64 *)arg4);
232 break;
233 case GET_NEXT_HIGH_MONO_COUNT:
234 status = efi_call_virt(get_next_high_mono_count, (u32 *)arg1);
235 break;
236 case UPDATE_CAPSULE:
237 status = efi_call_virt(update_capsule,
238 (efi_capsule_header_t **)arg1,
239 *(unsigned long *)arg2,
240 *(unsigned long *)arg3);
241 break;
242 case QUERY_CAPSULE_CAPS:
243 status = efi_call_virt(query_capsule_caps,
244 (efi_capsule_header_t **)arg1,
245 *(unsigned long *)arg2, (u64 *)arg3,
246 (int *)arg4);
247 break;
248 default:
249 /*
250 * Ideally, we should never reach here because a caller of this
251 * function should have put the right efi_runtime_service()
252 * function identifier into efi_rts_work->efi_rts_id
253 */
254 pr_err("Requested executing invalid EFI Runtime Service.\n");
255 }
256 efi_rts_work->status = status;
257 complete(&efi_rts_work->efi_rts_comp);
258}
259
93static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) 260static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
94{ 261{
95 efi_status_t status; 262 efi_status_t status;
96 263
97 if (down_interruptible(&efi_runtime_lock)) 264 if (down_interruptible(&efi_runtime_lock))
98 return EFI_ABORTED; 265 return EFI_ABORTED;
99 status = efi_call_virt(get_time, tm, tc); 266 status = efi_queue_work(GET_TIME, tm, tc, NULL, NULL, NULL);
100 up(&efi_runtime_lock); 267 up(&efi_runtime_lock);
101 return status; 268 return status;
102} 269}
@@ -107,7 +274,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
107 274
108 if (down_interruptible(&efi_runtime_lock)) 275 if (down_interruptible(&efi_runtime_lock))
109 return EFI_ABORTED; 276 return EFI_ABORTED;
110 status = efi_call_virt(set_time, tm); 277 status = efi_queue_work(SET_TIME, tm, NULL, NULL, NULL, NULL);
111 up(&efi_runtime_lock); 278 up(&efi_runtime_lock);
112 return status; 279 return status;
113} 280}
@@ -120,7 +287,8 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
120 287
121 if (down_interruptible(&efi_runtime_lock)) 288 if (down_interruptible(&efi_runtime_lock))
122 return EFI_ABORTED; 289 return EFI_ABORTED;
123 status = efi_call_virt(get_wakeup_time, enabled, pending, tm); 290 status = efi_queue_work(GET_WAKEUP_TIME, enabled, pending, tm, NULL,
291 NULL);
124 up(&efi_runtime_lock); 292 up(&efi_runtime_lock);
125 return status; 293 return status;
126} 294}
@@ -131,7 +299,8 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
131 299
132 if (down_interruptible(&efi_runtime_lock)) 300 if (down_interruptible(&efi_runtime_lock))
133 return EFI_ABORTED; 301 return EFI_ABORTED;
134 status = efi_call_virt(set_wakeup_time, enabled, tm); 302 status = efi_queue_work(SET_WAKEUP_TIME, &enabled, tm, NULL, NULL,
303 NULL);
135 up(&efi_runtime_lock); 304 up(&efi_runtime_lock);
136 return status; 305 return status;
137} 306}
@@ -146,8 +315,8 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
146 315
147 if (down_interruptible(&efi_runtime_lock)) 316 if (down_interruptible(&efi_runtime_lock))
148 return EFI_ABORTED; 317 return EFI_ABORTED;
149 status = efi_call_virt(get_variable, name, vendor, attr, data_size, 318 status = efi_queue_work(GET_VARIABLE, name, vendor, attr, data_size,
150 data); 319 data);
151 up(&efi_runtime_lock); 320 up(&efi_runtime_lock);
152 return status; 321 return status;
153} 322}
@@ -160,7 +329,8 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
160 329
161 if (down_interruptible(&efi_runtime_lock)) 330 if (down_interruptible(&efi_runtime_lock))
162 return EFI_ABORTED; 331 return EFI_ABORTED;
163 status = efi_call_virt(get_next_variable, name_size, name, vendor); 332 status = efi_queue_work(GET_NEXT_VARIABLE, name_size, name, vendor,
333 NULL, NULL);
164 up(&efi_runtime_lock); 334 up(&efi_runtime_lock);
165 return status; 335 return status;
166} 336}
@@ -175,8 +345,8 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
175 345
176 if (down_interruptible(&efi_runtime_lock)) 346 if (down_interruptible(&efi_runtime_lock))
177 return EFI_ABORTED; 347 return EFI_ABORTED;
178 status = efi_call_virt(set_variable, name, vendor, attr, data_size, 348 status = efi_queue_work(SET_VARIABLE, name, vendor, &attr, &data_size,
179 data); 349 data);
180 up(&efi_runtime_lock); 350 up(&efi_runtime_lock);
181 return status; 351 return status;
182} 352}
@@ -210,8 +380,8 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
210 380
211 if (down_interruptible(&efi_runtime_lock)) 381 if (down_interruptible(&efi_runtime_lock))
212 return EFI_ABORTED; 382 return EFI_ABORTED;
213 status = efi_call_virt(query_variable_info, attr, storage_space, 383 status = efi_queue_work(QUERY_VARIABLE_INFO, &attr, storage_space,
214 remaining_space, max_variable_size); 384 remaining_space, max_variable_size, NULL);
215 up(&efi_runtime_lock); 385 up(&efi_runtime_lock);
216 return status; 386 return status;
217} 387}
@@ -242,7 +412,8 @@ static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
242 412
243 if (down_interruptible(&efi_runtime_lock)) 413 if (down_interruptible(&efi_runtime_lock))
244 return EFI_ABORTED; 414 return EFI_ABORTED;
245 status = efi_call_virt(get_next_high_mono_count, count); 415 status = efi_queue_work(GET_NEXT_HIGH_MONO_COUNT, count, NULL, NULL,
416 NULL, NULL);
246 up(&efi_runtime_lock); 417 up(&efi_runtime_lock);
247 return status; 418 return status;
248} 419}
@@ -272,7 +443,8 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
272 443
273 if (down_interruptible(&efi_runtime_lock)) 444 if (down_interruptible(&efi_runtime_lock))
274 return EFI_ABORTED; 445 return EFI_ABORTED;
275 status = efi_call_virt(update_capsule, capsules, count, sg_list); 446 status = efi_queue_work(UPDATE_CAPSULE, capsules, &count, &sg_list,
447 NULL, NULL);
276 up(&efi_runtime_lock); 448 up(&efi_runtime_lock);
277 return status; 449 return status;
278} 450}
@@ -289,8 +461,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
289 461
290 if (down_interruptible(&efi_runtime_lock)) 462 if (down_interruptible(&efi_runtime_lock))
291 return EFI_ABORTED; 463 return EFI_ABORTED;
292 status = efi_call_virt(query_capsule_caps, capsules, count, max_size, 464 status = efi_queue_work(QUERY_CAPSULE_CAPS, capsules, &count,
293 reset_type); 465 max_size, reset_type, NULL);
294 up(&efi_runtime_lock); 466 up(&efi_runtime_lock);
295 return status; 467 return status;
296} 468}
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index e2232cbcec8b..addd9fecc198 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -25,6 +25,7 @@
25 25
26struct acpi_gpio_event { 26struct acpi_gpio_event {
27 struct list_head node; 27 struct list_head node;
28 struct list_head initial_sync_list;
28 acpi_handle handle; 29 acpi_handle handle;
29 unsigned int pin; 30 unsigned int pin;
30 unsigned int irq; 31 unsigned int irq;
@@ -50,6 +51,9 @@ struct acpi_gpio_chip {
50 struct list_head events; 51 struct list_head events;
51}; 52};
52 53
54static LIST_HEAD(acpi_gpio_initial_sync_list);
55static DEFINE_MUTEX(acpi_gpio_initial_sync_list_lock);
56
53static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) 57static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
54{ 58{
55 if (!gc->parent) 59 if (!gc->parent)
@@ -85,6 +89,21 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin)
85 return gpiochip_get_desc(chip, pin); 89 return gpiochip_get_desc(chip, pin);
86} 90}
87 91
92static void acpi_gpio_add_to_initial_sync_list(struct acpi_gpio_event *event)
93{
94 mutex_lock(&acpi_gpio_initial_sync_list_lock);
95 list_add(&event->initial_sync_list, &acpi_gpio_initial_sync_list);
96 mutex_unlock(&acpi_gpio_initial_sync_list_lock);
97}
98
99static void acpi_gpio_del_from_initial_sync_list(struct acpi_gpio_event *event)
100{
101 mutex_lock(&acpi_gpio_initial_sync_list_lock);
102 if (!list_empty(&event->initial_sync_list))
103 list_del_init(&event->initial_sync_list);
104 mutex_unlock(&acpi_gpio_initial_sync_list_lock);
105}
106
88static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) 107static irqreturn_t acpi_gpio_irq_handler(int irq, void *data)
89{ 108{
90 struct acpi_gpio_event *event = data; 109 struct acpi_gpio_event *event = data;
@@ -136,7 +155,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
136 irq_handler_t handler = NULL; 155 irq_handler_t handler = NULL;
137 struct gpio_desc *desc; 156 struct gpio_desc *desc;
138 unsigned long irqflags; 157 unsigned long irqflags;
139 int ret, pin, irq; 158 int ret, pin, irq, value;
140 159
141 if (!acpi_gpio_get_irq_resource(ares, &agpio)) 160 if (!acpi_gpio_get_irq_resource(ares, &agpio))
142 return AE_OK; 161 return AE_OK;
@@ -167,6 +186,8 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
167 186
168 gpiod_direction_input(desc); 187 gpiod_direction_input(desc);
169 188
189 value = gpiod_get_value(desc);
190
170 ret = gpiochip_lock_as_irq(chip, pin); 191 ret = gpiochip_lock_as_irq(chip, pin);
171 if (ret) { 192 if (ret) {
172 dev_err(chip->parent, "Failed to lock GPIO as interrupt\n"); 193 dev_err(chip->parent, "Failed to lock GPIO as interrupt\n");
@@ -208,6 +229,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
208 event->irq = irq; 229 event->irq = irq;
209 event->pin = pin; 230 event->pin = pin;
210 event->desc = desc; 231 event->desc = desc;
232 INIT_LIST_HEAD(&event->initial_sync_list);
211 233
212 ret = request_threaded_irq(event->irq, NULL, handler, irqflags, 234 ret = request_threaded_irq(event->irq, NULL, handler, irqflags,
213 "ACPI:Event", event); 235 "ACPI:Event", event);
@@ -222,6 +244,18 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
222 enable_irq_wake(irq); 244 enable_irq_wake(irq);
223 245
224 list_add_tail(&event->node, &acpi_gpio->events); 246 list_add_tail(&event->node, &acpi_gpio->events);
247
248 /*
249 * Make sure we trigger the initial state of the IRQ when using RISING
250 * or FALLING. Note we run the handlers on late_init, the AML code
251 * may refer to OperationRegions from other (builtin) drivers which
252 * may be probed after us.
253 */
254 if (handler == acpi_gpio_irq_handler &&
255 (((irqflags & IRQF_TRIGGER_RISING) && value == 1) ||
256 ((irqflags & IRQF_TRIGGER_FALLING) && value == 0)))
257 acpi_gpio_add_to_initial_sync_list(event);
258
225 return AE_OK; 259 return AE_OK;
226 260
227fail_free_event: 261fail_free_event:
@@ -294,6 +328,8 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
294 list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) { 328 list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) {
295 struct gpio_desc *desc; 329 struct gpio_desc *desc;
296 330
331 acpi_gpio_del_from_initial_sync_list(event);
332
297 if (irqd_is_wakeup_set(irq_get_irq_data(event->irq))) 333 if (irqd_is_wakeup_set(irq_get_irq_data(event->irq)))
298 disable_irq_wake(event->irq); 334 disable_irq_wake(event->irq);
299 335
@@ -1158,3 +1194,21 @@ bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id)
1158 1194
1159 return con_id == NULL; 1195 return con_id == NULL;
1160} 1196}
1197
1198/* Sync the initial state of handlers after all builtin drivers have probed */
1199static int acpi_gpio_initial_sync(void)
1200{
1201 struct acpi_gpio_event *event, *ep;
1202
1203 mutex_lock(&acpi_gpio_initial_sync_list_lock);
1204 list_for_each_entry_safe(event, ep, &acpi_gpio_initial_sync_list,
1205 initial_sync_list) {
1206 acpi_evaluate_object(event->handle, NULL, NULL, NULL);
1207 list_del_init(&event->initial_sync_list);
1208 }
1209 mutex_unlock(&acpi_gpio_initial_sync_list_lock);
1210
1211 return 0;
1212}
1213/* We must use _sync so that this runs after the first deferred_probe run */
1214late_initcall_sync(acpi_gpio_initial_sync);
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index 73021b388e12..dd3ff2f2cdce 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -429,6 +429,18 @@ static void adv7511_hpd_work(struct work_struct *work)
429 else 429 else
430 status = connector_status_disconnected; 430 status = connector_status_disconnected;
431 431
432 /*
433 * The bridge resets its registers on unplug. So when we get a plug
434 * event and we're already supposed to be powered, cycle the bridge to
435 * restore its state.
436 */
437 if (status == connector_status_connected &&
438 adv7511->connector.status == connector_status_disconnected &&
439 adv7511->powered) {
440 regcache_mark_dirty(adv7511->regmap);
441 adv7511_power_on(adv7511);
442 }
443
432 if (adv7511->connector.status != status) { 444 if (adv7511->connector.status != status) {
433 adv7511->connector.status = status; 445 adv7511->connector.status = status;
434 if (status == connector_status_disconnected) 446 if (status == connector_status_disconnected)
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 130da5195f3b..81e32199d3ef 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1510,8 +1510,9 @@ int drm_atomic_helper_async_check(struct drm_device *dev,
1510{ 1510{
1511 struct drm_crtc *crtc; 1511 struct drm_crtc *crtc;
1512 struct drm_crtc_state *crtc_state; 1512 struct drm_crtc_state *crtc_state;
1513 struct drm_plane *plane; 1513 struct drm_plane *plane = NULL;
1514 struct drm_plane_state *old_plane_state, *new_plane_state; 1514 struct drm_plane_state *old_plane_state = NULL;
1515 struct drm_plane_state *new_plane_state = NULL;
1515 const struct drm_plane_helper_funcs *funcs; 1516 const struct drm_plane_helper_funcs *funcs;
1516 int i, n_planes = 0; 1517 int i, n_planes = 0;
1517 1518
@@ -1527,7 +1528,8 @@ int drm_atomic_helper_async_check(struct drm_device *dev,
1527 if (n_planes != 1) 1528 if (n_planes != 1)
1528 return -EINVAL; 1529 return -EINVAL;
1529 1530
1530 if (!new_plane_state->crtc) 1531 if (!new_plane_state->crtc ||
1532 old_plane_state->crtc != new_plane_state->crtc)
1531 return -EINVAL; 1533 return -EINVAL;
1532 1534
1533 funcs = plane->helper_private; 1535 funcs = plane->helper_private;
diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c
index 3c4000facb36..f973d287696a 100644
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -372,7 +372,7 @@ int drm_legacy_addctx(struct drm_device *dev, void *data,
372 ctx->handle = drm_legacy_ctxbitmap_next(dev); 372 ctx->handle = drm_legacy_ctxbitmap_next(dev);
373 } 373 }
374 DRM_DEBUG("%d\n", ctx->handle); 374 DRM_DEBUG("%d\n", ctx->handle);
375 if (ctx->handle == -1) { 375 if (ctx->handle < 0) {
376 DRM_DEBUG("Not enough free contexts.\n"); 376 DRM_DEBUG("Not enough free contexts.\n");
377 /* Should this return -EBUSY instead? */ 377 /* Should this return -EBUSY instead? */
378 return -ENOMEM; 378 return -ENOMEM;
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 1d34619eb3fe..a951ec75d01f 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -320,6 +320,9 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
320 vc4_state->x_scaling[0] = VC4_SCALING_TPZ; 320 vc4_state->x_scaling[0] = VC4_SCALING_TPZ;
321 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE) 321 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE)
322 vc4_state->y_scaling[0] = VC4_SCALING_TPZ; 322 vc4_state->y_scaling[0] = VC4_SCALING_TPZ;
323 } else {
324 vc4_state->x_scaling[1] = VC4_SCALING_NONE;
325 vc4_state->y_scaling[1] = VC4_SCALING_NONE;
323 } 326 }
324 327
325 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 328 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
index 1f41a4f89c08..8a873975cf12 100644
--- a/drivers/i2c/busses/i2c-xlp9xx.c
+++ b/drivers/i2c/busses/i2c-xlp9xx.c
@@ -191,28 +191,43 @@ static void xlp9xx_i2c_drain_rx_fifo(struct xlp9xx_i2c_dev *priv)
191 if (priv->len_recv) { 191 if (priv->len_recv) {
192 /* read length byte */ 192 /* read length byte */
193 rlen = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_MRXFIFO); 193 rlen = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_MRXFIFO);
194
195 /*
196 * We expect at least 2 interrupts for I2C_M_RECV_LEN
197 * transactions. The length is updated during the first
198 * interrupt, and the buffer contents are only copied
199 * during subsequent interrupts. If in case the interrupts
200 * get merged we would complete the transaction without
201 * copying out the bytes from RX fifo. To avoid this now we
202 * drain the fifo as and when data is available.
203 * We drained the rlen byte already, decrement total length
204 * by one.
205 */
206
207 len--;
194 if (rlen > I2C_SMBUS_BLOCK_MAX || rlen == 0) { 208 if (rlen > I2C_SMBUS_BLOCK_MAX || rlen == 0) {
195 rlen = 0; /*abort transfer */ 209 rlen = 0; /*abort transfer */
196 priv->msg_buf_remaining = 0; 210 priv->msg_buf_remaining = 0;
197 priv->msg_len = 0; 211 priv->msg_len = 0;
198 } else { 212 xlp9xx_i2c_update_rlen(priv);
199 *buf++ = rlen; 213 return;
200 if (priv->client_pec)
201 ++rlen; /* account for error check byte */
202 /* update remaining bytes and message length */
203 priv->msg_buf_remaining = rlen;
204 priv->msg_len = rlen + 1;
205 } 214 }
215
216 *buf++ = rlen;
217 if (priv->client_pec)
218 ++rlen; /* account for error check byte */
219 /* update remaining bytes and message length */
220 priv->msg_buf_remaining = rlen;
221 priv->msg_len = rlen + 1;
206 xlp9xx_i2c_update_rlen(priv); 222 xlp9xx_i2c_update_rlen(priv);
207 priv->len_recv = false; 223 priv->len_recv = false;
208 } else {
209 len = min(priv->msg_buf_remaining, len);
210 for (i = 0; i < len; i++, buf++)
211 *buf = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_MRXFIFO);
212
213 priv->msg_buf_remaining -= len;
214 } 224 }
215 225
226 len = min(priv->msg_buf_remaining, len);
227 for (i = 0; i < len; i++, buf++)
228 *buf = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_MRXFIFO);
229
230 priv->msg_buf_remaining -= len;
216 priv->msg_buf = buf; 231 priv->msg_buf = buf;
217 232
218 if (priv->msg_buf_remaining) 233 if (priv->msg_buf_remaining)
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index a6e904973ba8..475910ffbcb6 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -121,7 +121,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
121 * this lock. 121 * this lock.
122 */ 122 */
123 if (!exclusive) 123 if (!exclusive)
124 return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ? 124 return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
125 -EBUSY : 0; 125 -EBUSY : 0;
126 126
127 /* lock is either WRITE or DESTROY - should be exclusive */ 127 /* lock is either WRITE or DESTROY - should be exclusive */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index cc06e8404e9b..583d3a10b940 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1984,15 +1984,64 @@ static int modify_qp(struct ib_uverbs_file *file,
1984 goto release_qp; 1984 goto release_qp;
1985 } 1985 }
1986 1986
1987 if ((cmd->base.attr_mask & IB_QP_AV) && 1987 if ((cmd->base.attr_mask & IB_QP_AV)) {
1988 !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { 1988 if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
1989 ret = -EINVAL; 1989 ret = -EINVAL;
1990 goto release_qp; 1990 goto release_qp;
1991 }
1992
1993 if (cmd->base.attr_mask & IB_QP_STATE &&
1994 cmd->base.qp_state == IB_QPS_RTR) {
1995 /* We are in INIT->RTR TRANSITION (if we are not,
1996 * this transition will be rejected in subsequent checks).
1997 * In the INIT->RTR transition, we cannot have IB_QP_PORT set,
1998 * but the IB_QP_STATE flag is required.
1999 *
2000 * Since kernel 3.14 (commit dbf727de7440), the uverbs driver,
2001 * when IB_QP_AV is set, has required inclusion of a valid
2002 * port number in the primary AV. (AVs are created and handled
2003 * differently for infiniband and ethernet (RoCE) ports).
2004 *
2005 * Check the port number included in the primary AV against
2006 * the port number in the qp struct, which was set (and saved)
2007 * in the RST->INIT transition.
2008 */
2009 if (cmd->base.dest.port_num != qp->real_qp->port) {
2010 ret = -EINVAL;
2011 goto release_qp;
2012 }
2013 } else {
2014 /* We are in SQD->SQD. (If we are not, this transition will
2015 * be rejected later in the verbs layer checks).
2016 * Check for both IB_QP_PORT and IB_QP_AV, these can be set
2017 * together in the SQD->SQD transition.
2018 *
2019 * If only IP_QP_AV was set, add in IB_QP_PORT as well (the
2020 * verbs layer driver does not track primary port changes
2021 * resulting from path migration. Thus, in SQD, if the primary
2022 * AV is modified, the primary port should also be modified).
2023 *
2024 * Note that in this transition, the IB_QP_STATE flag
2025 * is not allowed.
2026 */
2027 if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
2028 == (IB_QP_AV | IB_QP_PORT)) &&
2029 cmd->base.port_num != cmd->base.dest.port_num) {
2030 ret = -EINVAL;
2031 goto release_qp;
2032 }
2033 if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
2034 == IB_QP_AV) {
2035 cmd->base.attr_mask |= IB_QP_PORT;
2036 cmd->base.port_num = cmd->base.dest.port_num;
2037 }
2038 }
1991 } 2039 }
1992 2040
1993 if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && 2041 if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
1994 (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) || 2042 (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) ||
1995 !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) { 2043 !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) ||
2044 cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) {
1996 ret = -EINVAL; 2045 ret = -EINVAL;
1997 goto release_qp; 2046 goto release_qp;
1998 } 2047 }
diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index a4e404aaf64b..5c7afdec192c 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c
@@ -57,8 +57,8 @@ MODULE_LICENSE("GPL v2");
57 #define HIL_DATA 0x1 57 #define HIL_DATA 0x1
58 #define HIL_CMD 0x3 58 #define HIL_CMD 0x3
59 #define HIL_IRQ 2 59 #define HIL_IRQ 2
60 #define hil_readb(p) readb(p) 60 #define hil_readb(p) readb((const volatile void __iomem *)(p))
61 #define hil_writeb(v,p) writeb((v),(p)) 61 #define hil_writeb(v, p) writeb((v), (volatile void __iomem *)(p))
62 62
63#else 63#else
64#error "HIL is not supported on this platform" 64#error "HIL is not supported on this platform"
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index e9233db16e03..d564d21245c5 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -8,7 +8,7 @@ config ARM_GIC
8 bool 8 bool
9 select IRQ_DOMAIN 9 select IRQ_DOMAIN
10 select IRQ_DOMAIN_HIERARCHY 10 select IRQ_DOMAIN_HIERARCHY
11 select MULTI_IRQ_HANDLER 11 select GENERIC_IRQ_MULTI_HANDLER
12 select GENERIC_IRQ_EFFECTIVE_AFF_MASK 12 select GENERIC_IRQ_EFFECTIVE_AFF_MASK
13 13
14config ARM_GIC_PM 14config ARM_GIC_PM
@@ -34,7 +34,7 @@ config GIC_NON_BANKED
34config ARM_GIC_V3 34config ARM_GIC_V3
35 bool 35 bool
36 select IRQ_DOMAIN 36 select IRQ_DOMAIN
37 select MULTI_IRQ_HANDLER 37 select GENERIC_IRQ_MULTI_HANDLER
38 select IRQ_DOMAIN_HIERARCHY 38 select IRQ_DOMAIN_HIERARCHY
39 select PARTITION_PERCPU 39 select PARTITION_PERCPU
40 select GENERIC_IRQ_EFFECTIVE_AFF_MASK 40 select GENERIC_IRQ_EFFECTIVE_AFF_MASK
@@ -66,7 +66,7 @@ config ARM_NVIC
66config ARM_VIC 66config ARM_VIC
67 bool 67 bool
68 select IRQ_DOMAIN 68 select IRQ_DOMAIN
69 select MULTI_IRQ_HANDLER 69 select GENERIC_IRQ_MULTI_HANDLER
70 70
71config ARM_VIC_NR 71config ARM_VIC_NR
72 int 72 int
@@ -93,14 +93,14 @@ config ATMEL_AIC_IRQ
93 bool 93 bool
94 select GENERIC_IRQ_CHIP 94 select GENERIC_IRQ_CHIP
95 select IRQ_DOMAIN 95 select IRQ_DOMAIN
96 select MULTI_IRQ_HANDLER 96 select GENERIC_IRQ_MULTI_HANDLER
97 select SPARSE_IRQ 97 select SPARSE_IRQ
98 98
99config ATMEL_AIC5_IRQ 99config ATMEL_AIC5_IRQ
100 bool 100 bool
101 select GENERIC_IRQ_CHIP 101 select GENERIC_IRQ_CHIP
102 select IRQ_DOMAIN 102 select IRQ_DOMAIN
103 select MULTI_IRQ_HANDLER 103 select GENERIC_IRQ_MULTI_HANDLER
104 select SPARSE_IRQ 104 select SPARSE_IRQ
105 105
106config I8259 106config I8259
@@ -137,7 +137,7 @@ config DW_APB_ICTL
137config FARADAY_FTINTC010 137config FARADAY_FTINTC010
138 bool 138 bool
139 select IRQ_DOMAIN 139 select IRQ_DOMAIN
140 select MULTI_IRQ_HANDLER 140 select GENERIC_IRQ_MULTI_HANDLER
141 select SPARSE_IRQ 141 select SPARSE_IRQ
142 142
143config HISILICON_IRQ_MBIGEN 143config HISILICON_IRQ_MBIGEN
@@ -162,7 +162,7 @@ config CLPS711X_IRQCHIP
162 bool 162 bool
163 depends on ARCH_CLPS711X 163 depends on ARCH_CLPS711X
164 select IRQ_DOMAIN 164 select IRQ_DOMAIN
165 select MULTI_IRQ_HANDLER 165 select GENERIC_IRQ_MULTI_HANDLER
166 select SPARSE_IRQ 166 select SPARSE_IRQ
167 default y 167 default y
168 168
@@ -181,7 +181,7 @@ config OMAP_IRQCHIP
181config ORION_IRQCHIP 181config ORION_IRQCHIP
182 bool 182 bool
183 select IRQ_DOMAIN 183 select IRQ_DOMAIN
184 select MULTI_IRQ_HANDLER 184 select GENERIC_IRQ_MULTI_HANDLER
185 185
186config PIC32_EVIC 186config PIC32_EVIC
187 bool 187 bool
diff --git a/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c b/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
index 4eca5c763766..606efa64adff 100644
--- a/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
@@ -45,6 +45,9 @@ static int its_fsl_mc_msi_prepare(struct irq_domain *msi_domain,
45 */ 45 */
46 info->scratchpad[0].ul = mc_bus_dev->icid; 46 info->scratchpad[0].ul = mc_bus_dev->icid;
47 msi_info = msi_get_domain_info(msi_domain->parent); 47 msi_info = msi_get_domain_info(msi_domain->parent);
48
49 /* Allocate at least 32 MSIs, and always as a power of 2 */
50 nvec = max_t(int, 32, roundup_pow_of_two(nvec));
48 return msi_info->ops->msi_prepare(msi_domain->parent, dev, nvec, info); 51 return msi_info->ops->msi_prepare(msi_domain->parent, dev, nvec, info);
49} 52}
50 53
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index 25a98de5cfb2..8d6d009d1d58 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -66,7 +66,7 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
66{ 66{
67 struct pci_dev *pdev, *alias_dev; 67 struct pci_dev *pdev, *alias_dev;
68 struct msi_domain_info *msi_info; 68 struct msi_domain_info *msi_info;
69 int alias_count = 0; 69 int alias_count = 0, minnvec = 1;
70 70
71 if (!dev_is_pci(dev)) 71 if (!dev_is_pci(dev))
72 return -EINVAL; 72 return -EINVAL;
@@ -86,8 +86,18 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
86 /* ITS specific DeviceID, as the core ITS ignores dev. */ 86 /* ITS specific DeviceID, as the core ITS ignores dev. */
87 info->scratchpad[0].ul = pci_msi_domain_get_msi_rid(domain, pdev); 87 info->scratchpad[0].ul = pci_msi_domain_get_msi_rid(domain, pdev);
88 88
89 return msi_info->ops->msi_prepare(domain->parent, 89 /*
90 dev, max(nvec, alias_count), info); 90 * Always allocate a power of 2, and special case device 0 for
91 * broken systems where the DevID is not wired (and all devices
92 * appear as DevID 0). For that reason, we generously allocate a
93 * minimum of 32 MSIs for DevID 0. If you want more because all
94 * your devices are aliasing to DevID 0, consider fixing your HW.
95 */
96 nvec = max(nvec, alias_count);
97 if (!info->scratchpad[0].ul)
98 minnvec = 32;
99 nvec = max_t(int, minnvec, roundup_pow_of_two(nvec));
100 return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info);
91} 101}
92 102
93static struct msi_domain_ops its_pci_msi_ops = { 103static struct msi_domain_ops its_pci_msi_ops = {
diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 8881a053c173..7b8e87b493fe 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -73,6 +73,8 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev,
73 /* ITS specific DeviceID, as the core ITS ignores dev. */ 73 /* ITS specific DeviceID, as the core ITS ignores dev. */
74 info->scratchpad[0].ul = dev_id; 74 info->scratchpad[0].ul = dev_id;
75 75
76 /* Allocate at least 32 MSIs, and always as a power of 2 */
77 nvec = max_t(int, 32, roundup_pow_of_two(nvec));
76 return msi_info->ops->msi_prepare(domain->parent, 78 return msi_info->ops->msi_prepare(domain->parent,
77 dev, nvec, info); 79 dev, nvec, info);
78} 80}
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d7842d312d3e..316a57530f6d 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -23,6 +23,8 @@
23#include <linux/dma-iommu.h> 23#include <linux/dma-iommu.h>
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/irqdomain.h> 25#include <linux/irqdomain.h>
26#include <linux/list.h>
27#include <linux/list_sort.h>
26#include <linux/log2.h> 28#include <linux/log2.h>
27#include <linux/mm.h> 29#include <linux/mm.h>
28#include <linux/msi.h> 30#include <linux/msi.h>
@@ -160,7 +162,7 @@ static struct {
160} vpe_proxy; 162} vpe_proxy;
161 163
162static LIST_HEAD(its_nodes); 164static LIST_HEAD(its_nodes);
163static DEFINE_SPINLOCK(its_lock); 165static DEFINE_RAW_SPINLOCK(its_lock);
164static struct rdists *gic_rdists; 166static struct rdists *gic_rdists;
165static struct irq_domain *its_parent; 167static struct irq_domain *its_parent;
166 168
@@ -1421,112 +1423,176 @@ static struct irq_chip its_irq_chip = {
1421 .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, 1423 .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity,
1422}; 1424};
1423 1425
1426
1424/* 1427/*
1425 * How we allocate LPIs: 1428 * How we allocate LPIs:
1426 * 1429 *
1427 * The GIC has id_bits bits for interrupt identifiers. From there, we 1430 * lpi_range_list contains ranges of LPIs that are to available to
1428 * must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as 1431 * allocate from. To allocate LPIs, just pick the first range that
1429 * we allocate LPIs by chunks of 32, we can shift the whole thing by 5 1432 * fits the required allocation, and reduce it by the required
1430 * bits to the right. 1433 * amount. Once empty, remove the range from the list.
1434 *
1435 * To free a range of LPIs, add a free range to the list, sort it and
1436 * merge the result if the new range happens to be adjacent to an
1437 * already free block.
1431 * 1438 *
1432 * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations. 1439 * The consequence of the above is that allocation is cost is low, but
1440 * freeing is expensive. We assumes that freeing rarely occurs.
1433 */ 1441 */
1434#define IRQS_PER_CHUNK_SHIFT 5
1435#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT)
1436#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */
1437 1442
1438static unsigned long *lpi_bitmap; 1443static DEFINE_MUTEX(lpi_range_lock);
1439static u32 lpi_chunks; 1444static LIST_HEAD(lpi_range_list);
1440static DEFINE_SPINLOCK(lpi_lock); 1445
1446struct lpi_range {
1447 struct list_head entry;
1448 u32 base_id;
1449 u32 span;
1450};
1441 1451
1442static int its_lpi_to_chunk(int lpi) 1452static struct lpi_range *mk_lpi_range(u32 base, u32 span)
1443{ 1453{
1444 return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT; 1454 struct lpi_range *range;
1455
1456 range = kzalloc(sizeof(*range), GFP_KERNEL);
1457 if (range) {
1458 INIT_LIST_HEAD(&range->entry);
1459 range->base_id = base;
1460 range->span = span;
1461 }
1462
1463 return range;
1445} 1464}
1446 1465
1447static int its_chunk_to_lpi(int chunk) 1466static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b)
1448{ 1467{
1449 return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192; 1468 struct lpi_range *ra, *rb;
1469
1470 ra = container_of(a, struct lpi_range, entry);
1471 rb = container_of(b, struct lpi_range, entry);
1472
1473 return rb->base_id - ra->base_id;
1450} 1474}
1451 1475
1452static int __init its_lpi_init(u32 id_bits) 1476static void merge_lpi_ranges(void)
1453{ 1477{
1454 lpi_chunks = its_lpi_to_chunk(1UL << id_bits); 1478 struct lpi_range *range, *tmp;
1455 1479
1456 lpi_bitmap = kcalloc(BITS_TO_LONGS(lpi_chunks), sizeof(long), 1480 list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
1457 GFP_KERNEL); 1481 if (!list_is_last(&range->entry, &lpi_range_list) &&
1458 if (!lpi_bitmap) { 1482 (tmp->base_id == (range->base_id + range->span))) {
1459 lpi_chunks = 0; 1483 tmp->base_id = range->base_id;
1460 return -ENOMEM; 1484 tmp->span += range->span;
1485 list_del(&range->entry);
1486 kfree(range);
1487 }
1461 } 1488 }
1489}
1462 1490
1463 pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks); 1491static int alloc_lpi_range(u32 nr_lpis, u32 *base)
1464 return 0; 1492{
1493 struct lpi_range *range, *tmp;
1494 int err = -ENOSPC;
1495
1496 mutex_lock(&lpi_range_lock);
1497
1498 list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
1499 if (range->span >= nr_lpis) {
1500 *base = range->base_id;
1501 range->base_id += nr_lpis;
1502 range->span -= nr_lpis;
1503
1504 if (range->span == 0) {
1505 list_del(&range->entry);
1506 kfree(range);
1507 }
1508
1509 err = 0;
1510 break;
1511 }
1512 }
1513
1514 mutex_unlock(&lpi_range_lock);
1515
1516 pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis);
1517 return err;
1465} 1518}
1466 1519
1467static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) 1520static int free_lpi_range(u32 base, u32 nr_lpis)
1468{ 1521{
1469 unsigned long *bitmap = NULL; 1522 struct lpi_range *new;
1470 int chunk_id; 1523 int err = 0;
1471 int nr_chunks; 1524
1472 int i; 1525 mutex_lock(&lpi_range_lock);
1526
1527 new = mk_lpi_range(base, nr_lpis);
1528 if (!new) {
1529 err = -ENOMEM;
1530 goto out;
1531 }
1532
1533 list_add(&new->entry, &lpi_range_list);
1534 list_sort(NULL, &lpi_range_list, lpi_range_cmp);
1535 merge_lpi_ranges();
1536out:
1537 mutex_unlock(&lpi_range_lock);
1538 return err;
1539}
1540
1541static int __init its_lpi_init(u32 id_bits)
1542{
1543 u32 lpis = (1UL << id_bits) - 8192;
1544 u32 numlpis;
1545 int err;
1546
1547 numlpis = 1UL << GICD_TYPER_NUM_LPIS(gic_rdists->gicd_typer);
1548
1549 if (numlpis > 2 && !WARN_ON(numlpis > lpis)) {
1550 lpis = numlpis;
1551 pr_info("ITS: Using hypervisor restricted LPI range [%u]\n",
1552 lpis);
1553 }
1473 1554
1474 nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK); 1555 /*
1556 * Initializing the allocator is just the same as freeing the
1557 * full range of LPIs.
1558 */
1559 err = free_lpi_range(8192, lpis);
1560 pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis);
1561 return err;
1562}
1475 1563
1476 spin_lock(&lpi_lock); 1564static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids)
1565{
1566 unsigned long *bitmap = NULL;
1567 int err = 0;
1477 1568
1478 do { 1569 do {
1479 chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks, 1570 err = alloc_lpi_range(nr_irqs, base);
1480 0, nr_chunks, 0); 1571 if (!err)
1481 if (chunk_id < lpi_chunks)
1482 break; 1572 break;
1483 1573
1484 nr_chunks--; 1574 nr_irqs /= 2;
1485 } while (nr_chunks > 0); 1575 } while (nr_irqs > 0);
1486 1576
1487 if (!nr_chunks) 1577 if (err)
1488 goto out; 1578 goto out;
1489 1579
1490 bitmap = kcalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK), 1580 bitmap = kcalloc(BITS_TO_LONGS(nr_irqs), sizeof (long), GFP_ATOMIC);
1491 sizeof(long),
1492 GFP_ATOMIC);
1493 if (!bitmap) 1581 if (!bitmap)
1494 goto out; 1582 goto out;
1495 1583
1496 for (i = 0; i < nr_chunks; i++) 1584 *nr_ids = nr_irqs;
1497 set_bit(chunk_id + i, lpi_bitmap);
1498
1499 *base = its_chunk_to_lpi(chunk_id);
1500 *nr_ids = nr_chunks * IRQS_PER_CHUNK;
1501 1585
1502out: 1586out:
1503 spin_unlock(&lpi_lock);
1504
1505 if (!bitmap) 1587 if (!bitmap)
1506 *base = *nr_ids = 0; 1588 *base = *nr_ids = 0;
1507 1589
1508 return bitmap; 1590 return bitmap;
1509} 1591}
1510 1592
1511static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids) 1593static void its_lpi_free(unsigned long *bitmap, u32 base, u32 nr_ids)
1512{ 1594{
1513 int lpi; 1595 WARN_ON(free_lpi_range(base, nr_ids));
1514
1515 spin_lock(&lpi_lock);
1516
1517 for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) {
1518 int chunk = its_lpi_to_chunk(lpi);
1519
1520 BUG_ON(chunk > lpi_chunks);
1521 if (test_bit(chunk, lpi_bitmap)) {
1522 clear_bit(chunk, lpi_bitmap);
1523 } else {
1524 pr_err("Bad LPI chunk %d\n", chunk);
1525 }
1526 }
1527
1528 spin_unlock(&lpi_lock);
1529
1530 kfree(bitmap); 1596 kfree(bitmap);
1531} 1597}
1532 1598
@@ -1559,7 +1625,7 @@ static int __init its_alloc_lpi_tables(void)
1559{ 1625{
1560 phys_addr_t paddr; 1626 phys_addr_t paddr;
1561 1627
1562 lpi_id_bits = min_t(u32, gic_rdists->id_bits, ITS_MAX_LPI_NRBITS); 1628 lpi_id_bits = GICD_TYPER_ID_BITS(gic_rdists->gicd_typer);
1563 gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT); 1629 gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
1564 if (!gic_rdists->prop_page) { 1630 if (!gic_rdists->prop_page) {
1565 pr_err("Failed to allocate PROPBASE\n"); 1631 pr_err("Failed to allocate PROPBASE\n");
@@ -1997,12 +2063,12 @@ static void its_cpu_init_collections(void)
1997{ 2063{
1998 struct its_node *its; 2064 struct its_node *its;
1999 2065
2000 spin_lock(&its_lock); 2066 raw_spin_lock(&its_lock);
2001 2067
2002 list_for_each_entry(its, &its_nodes, entry) 2068 list_for_each_entry(its, &its_nodes, entry)
2003 its_cpu_init_collection(its); 2069 its_cpu_init_collection(its);
2004 2070
2005 spin_unlock(&its_lock); 2071 raw_spin_unlock(&its_lock);
2006} 2072}
2007 2073
2008static struct its_device *its_find_device(struct its_node *its, u32 dev_id) 2074static struct its_device *its_find_device(struct its_node *its, u32 dev_id)
@@ -2134,17 +2200,20 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
2134 if (!its_alloc_device_table(its, dev_id)) 2200 if (!its_alloc_device_table(its, dev_id))
2135 return NULL; 2201 return NULL;
2136 2202
2203 if (WARN_ON(!is_power_of_2(nvecs)))
2204 nvecs = roundup_pow_of_two(nvecs);
2205
2137 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 2206 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
2138 /* 2207 /*
2139 * We allocate at least one chunk worth of LPIs bet device, 2208 * Even if the device wants a single LPI, the ITT must be
2140 * and thus that many ITEs. The device may require less though. 2209 * sized as a power of two (and you need at least one bit...).
2141 */ 2210 */
2142 nr_ites = max(IRQS_PER_CHUNK, roundup_pow_of_two(nvecs)); 2211 nr_ites = max(2, nvecs);
2143 sz = nr_ites * its->ite_size; 2212 sz = nr_ites * its->ite_size;
2144 sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; 2213 sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
2145 itt = kzalloc(sz, GFP_KERNEL); 2214 itt = kzalloc(sz, GFP_KERNEL);
2146 if (alloc_lpis) { 2215 if (alloc_lpis) {
2147 lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); 2216 lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis);
2148 if (lpi_map) 2217 if (lpi_map)
2149 col_map = kcalloc(nr_lpis, sizeof(*col_map), 2218 col_map = kcalloc(nr_lpis, sizeof(*col_map),
2150 GFP_KERNEL); 2219 GFP_KERNEL);
@@ -2379,9 +2448,9 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
2379 /* If all interrupts have been freed, start mopping the floor */ 2448 /* If all interrupts have been freed, start mopping the floor */
2380 if (bitmap_empty(its_dev->event_map.lpi_map, 2449 if (bitmap_empty(its_dev->event_map.lpi_map,
2381 its_dev->event_map.nr_lpis)) { 2450 its_dev->event_map.nr_lpis)) {
2382 its_lpi_free_chunks(its_dev->event_map.lpi_map, 2451 its_lpi_free(its_dev->event_map.lpi_map,
2383 its_dev->event_map.lpi_base, 2452 its_dev->event_map.lpi_base,
2384 its_dev->event_map.nr_lpis); 2453 its_dev->event_map.nr_lpis);
2385 kfree(its_dev->event_map.col_map); 2454 kfree(its_dev->event_map.col_map);
2386 2455
2387 /* Unmap device/itt */ 2456 /* Unmap device/itt */
@@ -2780,7 +2849,7 @@ static void its_vpe_irq_domain_free(struct irq_domain *domain,
2780 } 2849 }
2781 2850
2782 if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) { 2851 if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) {
2783 its_lpi_free_chunks(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); 2852 its_lpi_free(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis);
2784 its_free_prop_table(vm->vprop_page); 2853 its_free_prop_table(vm->vprop_page);
2785 } 2854 }
2786} 2855}
@@ -2795,18 +2864,18 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
2795 2864
2796 BUG_ON(!vm); 2865 BUG_ON(!vm);
2797 2866
2798 bitmap = its_lpi_alloc_chunks(nr_irqs, &base, &nr_ids); 2867 bitmap = its_lpi_alloc(roundup_pow_of_two(nr_irqs), &base, &nr_ids);
2799 if (!bitmap) 2868 if (!bitmap)
2800 return -ENOMEM; 2869 return -ENOMEM;
2801 2870
2802 if (nr_ids < nr_irqs) { 2871 if (nr_ids < nr_irqs) {
2803 its_lpi_free_chunks(bitmap, base, nr_ids); 2872 its_lpi_free(bitmap, base, nr_ids);
2804 return -ENOMEM; 2873 return -ENOMEM;
2805 } 2874 }
2806 2875
2807 vprop_page = its_allocate_prop_table(GFP_KERNEL); 2876 vprop_page = its_allocate_prop_table(GFP_KERNEL);
2808 if (!vprop_page) { 2877 if (!vprop_page) {
2809 its_lpi_free_chunks(bitmap, base, nr_ids); 2878 its_lpi_free(bitmap, base, nr_ids);
2810 return -ENOMEM; 2879 return -ENOMEM;
2811 } 2880 }
2812 2881
@@ -2833,7 +2902,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
2833 if (i > 0) 2902 if (i > 0)
2834 its_vpe_irq_domain_free(domain, virq, i - 1); 2903 its_vpe_irq_domain_free(domain, virq, i - 1);
2835 2904
2836 its_lpi_free_chunks(bitmap, base, nr_ids); 2905 its_lpi_free(bitmap, base, nr_ids);
2837 its_free_prop_table(vprop_page); 2906 its_free_prop_table(vprop_page);
2838 } 2907 }
2839 2908
@@ -3070,7 +3139,7 @@ static int its_save_disable(void)
3070 struct its_node *its; 3139 struct its_node *its;
3071 int err = 0; 3140 int err = 0;
3072 3141
3073 spin_lock(&its_lock); 3142 raw_spin_lock(&its_lock);
3074 list_for_each_entry(its, &its_nodes, entry) { 3143 list_for_each_entry(its, &its_nodes, entry) {
3075 void __iomem *base; 3144 void __iomem *base;
3076 3145
@@ -3102,7 +3171,7 @@ err:
3102 writel_relaxed(its->ctlr_save, base + GITS_CTLR); 3171 writel_relaxed(its->ctlr_save, base + GITS_CTLR);
3103 } 3172 }
3104 } 3173 }
3105 spin_unlock(&its_lock); 3174 raw_spin_unlock(&its_lock);
3106 3175
3107 return err; 3176 return err;
3108} 3177}
@@ -3112,7 +3181,7 @@ static void its_restore_enable(void)
3112 struct its_node *its; 3181 struct its_node *its;
3113 int ret; 3182 int ret;
3114 3183
3115 spin_lock(&its_lock); 3184 raw_spin_lock(&its_lock);
3116 list_for_each_entry(its, &its_nodes, entry) { 3185 list_for_each_entry(its, &its_nodes, entry) {
3117 void __iomem *base; 3186 void __iomem *base;
3118 int i; 3187 int i;
@@ -3164,7 +3233,7 @@ static void its_restore_enable(void)
3164 GITS_TYPER_HCC(gic_read_typer(base + GITS_TYPER))) 3233 GITS_TYPER_HCC(gic_read_typer(base + GITS_TYPER)))
3165 its_cpu_init_collection(its); 3234 its_cpu_init_collection(its);
3166 } 3235 }
3167 spin_unlock(&its_lock); 3236 raw_spin_unlock(&its_lock);
3168} 3237}
3169 3238
3170static struct syscore_ops its_syscore_ops = { 3239static struct syscore_ops its_syscore_ops = {
@@ -3398,9 +3467,9 @@ static int __init its_probe_one(struct resource *res,
3398 if (err) 3467 if (err)
3399 goto out_free_tables; 3468 goto out_free_tables;
3400 3469
3401 spin_lock(&its_lock); 3470 raw_spin_lock(&its_lock);
3402 list_add(&its->entry, &its_nodes); 3471 list_add(&its->entry, &its_nodes);
3403 spin_unlock(&its_lock); 3472 raw_spin_unlock(&its_lock);
3404 3473
3405 return 0; 3474 return 0;
3406 3475
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 76ea56d779a1..e214181b77b7 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -877,7 +877,7 @@ static struct irq_chip gic_eoimode1_chip = {
877 .flags = IRQCHIP_SET_TYPE_MASKED, 877 .flags = IRQCHIP_SET_TYPE_MASKED,
878}; 878};
879 879
880#define GIC_ID_NR (1U << gic_data.rdists.id_bits) 880#define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
881 881
882static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, 882static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
883 irq_hw_number_t hw) 883 irq_hw_number_t hw)
@@ -1091,7 +1091,7 @@ static int __init gic_init_bases(void __iomem *dist_base,
1091 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI) 1091 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
1092 */ 1092 */
1093 typer = readl_relaxed(gic_data.dist_base + GICD_TYPER); 1093 typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
1094 gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer); 1094 gic_data.rdists.gicd_typer = typer;
1095 gic_irqs = GICD_TYPER_IRQS(typer); 1095 gic_irqs = GICD_TYPER_IRQS(typer);
1096 if (gic_irqs > 1020) 1096 if (gic_irqs > 1020)
1097 gic_irqs = 1020; 1097 gic_irqs = 1020;
diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c
index fc5953dea509..2ff08986b536 100644
--- a/drivers/irqchip/irq-ingenic.c
+++ b/drivers/irqchip/irq-ingenic.c
@@ -165,6 +165,7 @@ static int __init intc_1chip_of_init(struct device_node *node,
165 return ingenic_intc_of_init(node, 1); 165 return ingenic_intc_of_init(node, 1);
166} 166}
167IRQCHIP_DECLARE(jz4740_intc, "ingenic,jz4740-intc", intc_1chip_of_init); 167IRQCHIP_DECLARE(jz4740_intc, "ingenic,jz4740-intc", intc_1chip_of_init);
168IRQCHIP_DECLARE(jz4725b_intc, "ingenic,jz4725b-intc", intc_1chip_of_init);
168 169
169static int __init intc_2chip_of_init(struct device_node *node, 170static int __init intc_2chip_of_init(struct device_node *node,
170 struct device_node *parent) 171 struct device_node *parent)
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 3a7e8905a97e..3df527fcf4e1 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -159,6 +159,7 @@ static const struct stm32_exti_bank *stm32mp1_exti_banks[] = {
159}; 159};
160 160
161static const struct stm32_desc_irq stm32mp1_desc_irq[] = { 161static const struct stm32_desc_irq stm32mp1_desc_irq[] = {
162 { .exti = 0, .irq_parent = 6 },
162 { .exti = 1, .irq_parent = 7 }, 163 { .exti = 1, .irq_parent = 7 },
163 { .exti = 2, .irq_parent = 8 }, 164 { .exti = 2, .irq_parent = 8 },
164 { .exti = 3, .irq_parent = 9 }, 165 { .exti = 3, .irq_parent = 9 },
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index edb35a5c57ea..a99fc0ced7a7 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -728,9 +728,6 @@ EXPORT_SYMBOL_GPL(vsp1_du_setup_lif);
728 */ 728 */
729void vsp1_du_atomic_begin(struct device *dev, unsigned int pipe_index) 729void vsp1_du_atomic_begin(struct device *dev, unsigned int pipe_index)
730{ 730{
731 struct vsp1_device *vsp1 = dev_get_drvdata(dev);
732
733 mutex_lock(&vsp1->drm->lock);
734} 731}
735EXPORT_SYMBOL_GPL(vsp1_du_atomic_begin); 732EXPORT_SYMBOL_GPL(vsp1_du_atomic_begin);
736 733
@@ -846,6 +843,7 @@ void vsp1_du_atomic_flush(struct device *dev, unsigned int pipe_index,
846 843
847 drm_pipe->crc = cfg->crc; 844 drm_pipe->crc = cfg->crc;
848 845
846 mutex_lock(&vsp1->drm->lock);
849 vsp1_du_pipeline_setup_inputs(vsp1, pipe); 847 vsp1_du_pipeline_setup_inputs(vsp1, pipe);
850 vsp1_du_pipeline_configure(pipe); 848 vsp1_du_pipeline_configure(pipe);
851 mutex_unlock(&vsp1->drm->lock); 849 mutex_unlock(&vsp1->drm->lock);
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index fcfab6635f9c..81b150e5dfdb 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -174,6 +174,7 @@ static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
174 174
175 rcu_assign_pointer(raw->progs, new_array); 175 rcu_assign_pointer(raw->progs, new_array);
176 bpf_prog_array_free(old_array); 176 bpf_prog_array_free(old_array);
177 bpf_prog_put(prog);
177unlock: 178unlock:
178 mutex_unlock(&ir_raw_handler_lock); 179 mutex_unlock(&ir_raw_handler_lock);
179 return ret; 180 return ret;
diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c
index 2e0066b1a31c..e7948908e78c 100644
--- a/drivers/media/rc/rc-ir-raw.c
+++ b/drivers/media/rc/rc-ir-raw.c
@@ -30,13 +30,13 @@ static int ir_raw_event_thread(void *data)
30 while (kfifo_out(&raw->kfifo, &ev, 1)) { 30 while (kfifo_out(&raw->kfifo, &ev, 1)) {
31 if (is_timing_event(ev)) { 31 if (is_timing_event(ev)) {
32 if (ev.duration == 0) 32 if (ev.duration == 0)
33 dev_err(&dev->dev, "nonsensical timing event of duration 0"); 33 dev_warn_once(&dev->dev, "nonsensical timing event of duration 0");
34 if (is_timing_event(raw->prev_ev) && 34 if (is_timing_event(raw->prev_ev) &&
35 !is_transition(&ev, &raw->prev_ev)) 35 !is_transition(&ev, &raw->prev_ev))
36 dev_err(&dev->dev, "two consecutive events of type %s", 36 dev_warn_once(&dev->dev, "two consecutive events of type %s",
37 TO_STR(ev.pulse)); 37 TO_STR(ev.pulse));
38 if (raw->prev_ev.reset && ev.pulse == 0) 38 if (raw->prev_ev.reset && ev.pulse == 0)
39 dev_err(&dev->dev, "timing event after reset should be pulse"); 39 dev_warn_once(&dev->dev, "timing event after reset should be pulse");
40 } 40 }
41 list_for_each_entry(handler, &ir_raw_handler_list, list) 41 list_for_each_entry(handler, &ir_raw_handler_list, list)
42 if (dev->enabled_protocols & 42 if (dev->enabled_protocols &
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 2e222d9ee01f..ca68e1d2b2f9 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -679,6 +679,14 @@ static void ir_timer_repeat(struct timer_list *t)
679 spin_unlock_irqrestore(&dev->keylock, flags); 679 spin_unlock_irqrestore(&dev->keylock, flags);
680} 680}
681 681
682static unsigned int repeat_period(int protocol)
683{
684 if (protocol >= ARRAY_SIZE(protocols))
685 return 100;
686
687 return protocols[protocol].repeat_period;
688}
689
682/** 690/**
683 * rc_repeat() - signals that a key is still pressed 691 * rc_repeat() - signals that a key is still pressed
684 * @dev: the struct rc_dev descriptor of the device 692 * @dev: the struct rc_dev descriptor of the device
@@ -691,7 +699,7 @@ void rc_repeat(struct rc_dev *dev)
691{ 699{
692 unsigned long flags; 700 unsigned long flags;
693 unsigned int timeout = nsecs_to_jiffies(dev->timeout) + 701 unsigned int timeout = nsecs_to_jiffies(dev->timeout) +
694 msecs_to_jiffies(protocols[dev->last_protocol].repeat_period); 702 msecs_to_jiffies(repeat_period(dev->last_protocol));
695 struct lirc_scancode sc = { 703 struct lirc_scancode sc = {
696 .scancode = dev->last_scancode, .rc_proto = dev->last_protocol, 704 .scancode = dev->last_scancode, .rc_proto = dev->last_protocol,
697 .keycode = dev->keypressed ? dev->last_keycode : KEY_RESERVED, 705 .keycode = dev->keypressed ? dev->last_keycode : KEY_RESERVED,
@@ -803,7 +811,7 @@ void rc_keydown(struct rc_dev *dev, enum rc_proto protocol, u32 scancode,
803 811
804 if (dev->keypressed) { 812 if (dev->keypressed) {
805 dev->keyup_jiffies = jiffies + nsecs_to_jiffies(dev->timeout) + 813 dev->keyup_jiffies = jiffies + nsecs_to_jiffies(dev->timeout) +
806 msecs_to_jiffies(protocols[protocol].repeat_period); 814 msecs_to_jiffies(repeat_period(protocol));
807 mod_timer(&dev->timer_keyup, dev->keyup_jiffies); 815 mod_timer(&dev->timer_keyup, dev->keyup_jiffies);
808 } 816 }
809 spin_unlock_irqrestore(&dev->keylock, flags); 817 spin_unlock_irqrestore(&dev->keylock, flags);
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 75f781c11e89..de4e6e5bf304 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -293,9 +293,10 @@ static void mxcmci_swap_buffers(struct mmc_data *data)
293 int i; 293 int i;
294 294
295 for_each_sg(data->sg, sg, data->sg_len, i) { 295 for_each_sg(data->sg, sg, data->sg_len, i) {
296 void *buf = kmap_atomic(sg_page(sg) + sg->offset; 296 void *buf = kmap_atomic(sg_page(sg) + sg->offset);
297 buffer_swap32(buf, sg->length); 297 buffer_swap32(buf, sg->length);
298 kunmap_atomic(buf); 298 kunmap_atomic(buf);
299 }
299} 300}
300#else 301#else
301static inline void mxcmci_swap_buffers(struct mmc_data *data) {} 302static inline void mxcmci_swap_buffers(struct mmc_data *data) {}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 63e3844c5bec..217b790d22ed 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1717,6 +1717,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
1717 goto err_upper_unlink; 1717 goto err_upper_unlink;
1718 } 1718 }
1719 1719
1720 bond->nest_level = dev_get_nest_level(bond_dev) + 1;
1721
1720 /* If the mode uses primary, then the following is handled by 1722 /* If the mode uses primary, then the following is handled by
1721 * bond_change_active_slave(). 1723 * bond_change_active_slave().
1722 */ 1724 */
@@ -1764,7 +1766,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
1764 if (bond_mode_can_use_xmit_hash(bond)) 1766 if (bond_mode_can_use_xmit_hash(bond))
1765 bond_update_slave_arr(bond, NULL); 1767 bond_update_slave_arr(bond, NULL);
1766 1768
1767 bond->nest_level = dev_get_nest_level(bond_dev);
1768 1769
1769 netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", 1770 netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
1770 slave_dev->name, 1771 slave_dev->name,
@@ -3415,6 +3416,13 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res,
3415 } 3416 }
3416} 3417}
3417 3418
3419static int bond_get_nest_level(struct net_device *bond_dev)
3420{
3421 struct bonding *bond = netdev_priv(bond_dev);
3422
3423 return bond->nest_level;
3424}
3425
3418static void bond_get_stats(struct net_device *bond_dev, 3426static void bond_get_stats(struct net_device *bond_dev,
3419 struct rtnl_link_stats64 *stats) 3427 struct rtnl_link_stats64 *stats)
3420{ 3428{
@@ -3423,7 +3431,7 @@ static void bond_get_stats(struct net_device *bond_dev,
3423 struct list_head *iter; 3431 struct list_head *iter;
3424 struct slave *slave; 3432 struct slave *slave;
3425 3433
3426 spin_lock(&bond->stats_lock); 3434 spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev));
3427 memcpy(stats, &bond->bond_stats, sizeof(*stats)); 3435 memcpy(stats, &bond->bond_stats, sizeof(*stats));
3428 3436
3429 rcu_read_lock(); 3437 rcu_read_lock();
@@ -4227,6 +4235,7 @@ static const struct net_device_ops bond_netdev_ops = {
4227 .ndo_neigh_setup = bond_neigh_setup, 4235 .ndo_neigh_setup = bond_neigh_setup,
4228 .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, 4236 .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid,
4229 .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, 4237 .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid,
4238 .ndo_get_lock_subclass = bond_get_nest_level,
4230#ifdef CONFIG_NET_POLL_CONTROLLER 4239#ifdef CONFIG_NET_POLL_CONTROLLER
4231 .ndo_netpoll_setup = bond_netpoll_setup, 4240 .ndo_netpoll_setup = bond_netpoll_setup,
4232 .ndo_netpoll_cleanup = bond_netpoll_cleanup, 4241 .ndo_netpoll_cleanup = bond_netpoll_cleanup,
@@ -4725,6 +4734,7 @@ static int bond_init(struct net_device *bond_dev)
4725 if (!bond->wq) 4734 if (!bond->wq)
4726 return -ENOMEM; 4735 return -ENOMEM;
4727 4736
4737 bond->nest_level = SINGLE_DEPTH_NESTING;
4728 netdev_lockdep_set_classes(bond_dev); 4738 netdev_lockdep_set_classes(bond_dev);
4729 4739
4730 list_add_tail(&bond->bond_list, &bn->dev_list); 4740 list_add_tail(&bond->bond_list, &bn->dev_list);
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 12ff0020ecd6..b7dfd4109d24 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -1072,6 +1072,7 @@ static void ems_usb_disconnect(struct usb_interface *intf)
1072 usb_free_urb(dev->intr_urb); 1072 usb_free_urb(dev->intr_urb);
1073 1073
1074 kfree(dev->intr_in_buffer); 1074 kfree(dev->intr_in_buffer);
1075 kfree(dev->tx_msg_buffer);
1075 } 1076 }
1076} 1077}
1077 1078
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9ef07a06aceb..bb28c701381a 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2617,7 +2617,6 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
2617 .rmu_disable = mv88e6085_g1_rmu_disable, 2617 .rmu_disable = mv88e6085_g1_rmu_disable,
2618 .vtu_getnext = mv88e6352_g1_vtu_getnext, 2618 .vtu_getnext = mv88e6352_g1_vtu_getnext,
2619 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 2619 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
2620 .serdes_power = mv88e6341_serdes_power,
2621}; 2620};
2622 2621
2623static const struct mv88e6xxx_ops mv88e6095_ops = { 2622static const struct mv88e6xxx_ops mv88e6095_ops = {
@@ -2783,6 +2782,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
2783 .reset = mv88e6352_g1_reset, 2782 .reset = mv88e6352_g1_reset,
2784 .vtu_getnext = mv88e6352_g1_vtu_getnext, 2783 .vtu_getnext = mv88e6352_g1_vtu_getnext,
2785 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 2784 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
2785 .serdes_power = mv88e6341_serdes_power,
2786 .gpio_ops = &mv88e6352_gpio_ops, 2786 .gpio_ops = &mv88e6352_gpio_ops,
2787}; 2787};
2788 2788
@@ -2960,7 +2960,6 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
2960 .reset = mv88e6352_g1_reset, 2960 .reset = mv88e6352_g1_reset,
2961 .vtu_getnext = mv88e6352_g1_vtu_getnext, 2961 .vtu_getnext = mv88e6352_g1_vtu_getnext,
2962 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 2962 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
2963 .serdes_power = mv88e6341_serdes_power,
2964}; 2963};
2965 2964
2966static const struct mv88e6xxx_ops mv88e6176_ops = { 2965static const struct mv88e6xxx_ops mv88e6176_ops = {
@@ -3336,6 +3335,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
3336 .reset = mv88e6352_g1_reset, 3335 .reset = mv88e6352_g1_reset,
3337 .vtu_getnext = mv88e6352_g1_vtu_getnext, 3336 .vtu_getnext = mv88e6352_g1_vtu_getnext,
3338 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, 3337 .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
3338 .serdes_power = mv88e6341_serdes_power,
3339 .gpio_ops = &mv88e6352_gpio_ops, 3339 .gpio_ops = &mv88e6352_gpio_ops,
3340 .avb_ops = &mv88e6390_avb_ops, 3340 .avb_ops = &mv88e6390_avb_ops,
3341}; 3341};
diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index b6d735bf8011..342ae08ec3c2 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -153,9 +153,6 @@ static void dayna_block_input(struct net_device *dev, int count,
153static void dayna_block_output(struct net_device *dev, int count, 153static void dayna_block_output(struct net_device *dev, int count,
154 const unsigned char *buf, int start_page); 154 const unsigned char *buf, int start_page);
155 155
156#define memcpy_fromio(a, b, c) memcpy((a), (void *)(b), (c))
157#define memcpy_toio(a, b, c) memcpy((void *)(a), (b), (c))
158
159#define memcmp_withio(a, b, c) memcmp((a), (void *)(b), (c)) 156#define memcmp_withio(a, b, c) memcmp((a), (void *)(b), (c))
160 157
161/* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */ 158/* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */
@@ -239,7 +236,7 @@ static enum mac8390_access mac8390_testio(unsigned long membase)
239 unsigned long outdata = 0xA5A0B5B0; 236 unsigned long outdata = 0xA5A0B5B0;
240 unsigned long indata = 0x00000000; 237 unsigned long indata = 0x00000000;
241 /* Try writing 32 bits */ 238 /* Try writing 32 bits */
242 memcpy_toio(membase, &outdata, 4); 239 memcpy_toio((void __iomem *)membase, &outdata, 4);
243 /* Now compare them */ 240 /* Now compare them */
244 if (memcmp_withio(&outdata, membase, 4) == 0) 241 if (memcmp_withio(&outdata, membase, 4) == 0)
245 return ACCESS_32; 242 return ACCESS_32;
@@ -711,7 +708,7 @@ static void sane_get_8390_hdr(struct net_device *dev,
711 struct e8390_pkt_hdr *hdr, int ring_page) 708 struct e8390_pkt_hdr *hdr, int ring_page)
712{ 709{
713 unsigned long hdr_start = (ring_page - WD_START_PG)<<8; 710 unsigned long hdr_start = (ring_page - WD_START_PG)<<8;
714 memcpy_fromio(hdr, dev->mem_start + hdr_start, 4); 711 memcpy_fromio(hdr, (void __iomem *)dev->mem_start + hdr_start, 4);
715 /* Fix endianness */ 712 /* Fix endianness */
716 hdr->count = swab16(hdr->count); 713 hdr->count = swab16(hdr->count);
717} 714}
@@ -725,13 +722,16 @@ static void sane_block_input(struct net_device *dev, int count,
725 if (xfer_start + count > ei_status.rmem_end) { 722 if (xfer_start + count > ei_status.rmem_end) {
726 /* We must wrap the input move. */ 723 /* We must wrap the input move. */
727 int semi_count = ei_status.rmem_end - xfer_start; 724 int semi_count = ei_status.rmem_end - xfer_start;
728 memcpy_fromio(skb->data, dev->mem_start + xfer_base, 725 memcpy_fromio(skb->data,
726 (void __iomem *)dev->mem_start + xfer_base,
729 semi_count); 727 semi_count);
730 count -= semi_count; 728 count -= semi_count;
731 memcpy_fromio(skb->data + semi_count, ei_status.rmem_start, 729 memcpy_fromio(skb->data + semi_count,
732 count); 730 (void __iomem *)ei_status.rmem_start, count);
733 } else { 731 } else {
734 memcpy_fromio(skb->data, dev->mem_start + xfer_base, count); 732 memcpy_fromio(skb->data,
733 (void __iomem *)dev->mem_start + xfer_base,
734 count);
735 } 735 }
736} 736}
737 737
@@ -740,7 +740,7 @@ static void sane_block_output(struct net_device *dev, int count,
740{ 740{
741 long shmem = (start_page - WD_START_PG)<<8; 741 long shmem = (start_page - WD_START_PG)<<8;
742 742
743 memcpy_toio(dev->mem_start + shmem, buf, count); 743 memcpy_toio((void __iomem *)dev->mem_start + shmem, buf, count);
744} 744}
745 745
746/* dayna block input/output */ 746/* dayna block input/output */
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 1b9d3130af4d..17f12c18d225 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -333,6 +333,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
333 333
334 memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr)); 334 memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
335 335
336 io_sq->dma_addr_bits = ena_dev->dma_addr_bits;
336 io_sq->desc_entry_size = 337 io_sq->desc_entry_size =
337 (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? 338 (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
338 sizeof(struct ena_eth_io_tx_desc) : 339 sizeof(struct ena_eth_io_tx_desc) :
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 4b5d625de8f0..8a3a60bb2688 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -1111,14 +1111,14 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata)
1111 1111
1112 if (pdata->tx_pause != pdata->phy.tx_pause) { 1112 if (pdata->tx_pause != pdata->phy.tx_pause) {
1113 new_state = 1; 1113 new_state = 1;
1114 pdata->hw_if.config_tx_flow_control(pdata);
1115 pdata->tx_pause = pdata->phy.tx_pause; 1114 pdata->tx_pause = pdata->phy.tx_pause;
1115 pdata->hw_if.config_tx_flow_control(pdata);
1116 } 1116 }
1117 1117
1118 if (pdata->rx_pause != pdata->phy.rx_pause) { 1118 if (pdata->rx_pause != pdata->phy.rx_pause) {
1119 new_state = 1; 1119 new_state = 1;
1120 pdata->hw_if.config_rx_flow_control(pdata);
1121 pdata->rx_pause = pdata->phy.rx_pause; 1120 pdata->rx_pause = pdata->phy.rx_pause;
1121 pdata->hw_if.config_rx_flow_control(pdata);
1122 } 1122 }
1123 1123
1124 /* Speed support */ 1124 /* Speed support */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 956860a69797..3bdab972420b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -762,7 +762,7 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
762 762
763 hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC)); 763 hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC));
764 hw_atl_rpfl2multicast_flr_en_set(self, 764 hw_atl_rpfl2multicast_flr_en_set(self,
765 IS_FILTER_ENABLED(IFF_MULTICAST), 0); 765 IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
766 766
767 hw_atl_rpfl2_accept_all_mc_packets_set(self, 767 hw_atl_rpfl2_accept_all_mc_packets_set(self,
768 IS_FILTER_ENABLED(IFF_ALLMULTI)); 768 IS_FILTER_ENABLED(IFF_ALLMULTI));
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 5d08d2aeb172..e337da6ba2a4 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1083,6 +1083,8 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
1083 lmac->dmacs_count = (RX_DMAC_COUNT / bgx->lmac_count); 1083 lmac->dmacs_count = (RX_DMAC_COUNT / bgx->lmac_count);
1084 lmac->dmacs = kcalloc(lmac->dmacs_count, sizeof(*lmac->dmacs), 1084 lmac->dmacs = kcalloc(lmac->dmacs_count, sizeof(*lmac->dmacs),
1085 GFP_KERNEL); 1085 GFP_KERNEL);
1086 if (!lmac->dmacs)
1087 return -ENOMEM;
1086 1088
1087 /* Enable lmac */ 1089 /* Enable lmac */
1088 bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN); 1090 bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 00fc5f1afb1d..7dddb9e748b8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1038,10 +1038,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
1038 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_filterid)); 1038 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_filterid));
1039 req->local_port = cpu_to_be16(f->fs.val.lport); 1039 req->local_port = cpu_to_be16(f->fs.val.lport);
1040 req->peer_port = cpu_to_be16(f->fs.val.fport); 1040 req->peer_port = cpu_to_be16(f->fs.val.fport);
1041 req->local_ip = f->fs.val.lip[0] | f->fs.val.lip[1] << 8 | 1041 memcpy(&req->local_ip, f->fs.val.lip, 4);
1042 f->fs.val.lip[2] << 16 | f->fs.val.lip[3] << 24; 1042 memcpy(&req->peer_ip, f->fs.val.fip, 4);
1043 req->peer_ip = f->fs.val.fip[0] | f->fs.val.fip[1] << 8 |
1044 f->fs.val.fip[2] << 16 | f->fs.val.fip[3] << 24;
1045 req->opt0 = cpu_to_be64(NAGLE_V(f->fs.newvlan == VLAN_REMOVE || 1043 req->opt0 = cpu_to_be64(NAGLE_V(f->fs.newvlan == VLAN_REMOVE ||
1046 f->fs.newvlan == VLAN_REWRITE) | 1044 f->fs.newvlan == VLAN_REWRITE) |
1047 DELACK_V(f->fs.hitcnts) | 1045 DELACK_V(f->fs.hitcnts) |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index bc03c175a3cd..a8926e97935e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3072,6 +3072,7 @@ static void cxgb_del_udp_tunnel(struct net_device *netdev,
3072 3072
3073 adapter->geneve_port = 0; 3073 adapter->geneve_port = 0;
3074 t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0); 3074 t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0);
3075 break;
3075 default: 3076 default:
3076 return; 3077 return;
3077 } 3078 }
@@ -3157,6 +3158,7 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev,
3157 3158
3158 t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 3159 t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A,
3159 GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F); 3160 GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F);
3161 break;
3160 default: 3162 default:
3161 return; 3163 return;
3162 } 3164 }
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 90c645b8538e..60641e202534 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2047,28 +2047,42 @@ static int enic_stop(struct net_device *netdev)
2047 return 0; 2047 return 0;
2048} 2048}
2049 2049
2050static int _enic_change_mtu(struct net_device *netdev, int new_mtu)
2051{
2052 bool running = netif_running(netdev);
2053 int err = 0;
2054
2055 ASSERT_RTNL();
2056 if (running) {
2057 err = enic_stop(netdev);
2058 if (err)
2059 return err;
2060 }
2061
2062 netdev->mtu = new_mtu;
2063
2064 if (running) {
2065 err = enic_open(netdev);
2066 if (err)
2067 return err;
2068 }
2069
2070 return 0;
2071}
2072
2050static int enic_change_mtu(struct net_device *netdev, int new_mtu) 2073static int enic_change_mtu(struct net_device *netdev, int new_mtu)
2051{ 2074{
2052 struct enic *enic = netdev_priv(netdev); 2075 struct enic *enic = netdev_priv(netdev);
2053 int running = netif_running(netdev);
2054 2076
2055 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) 2077 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
2056 return -EOPNOTSUPP; 2078 return -EOPNOTSUPP;
2057 2079
2058 if (running)
2059 enic_stop(netdev);
2060
2061 netdev->mtu = new_mtu;
2062
2063 if (netdev->mtu > enic->port_mtu) 2080 if (netdev->mtu > enic->port_mtu)
2064 netdev_warn(netdev, 2081 netdev_warn(netdev,
2065 "interface MTU (%d) set higher than port MTU (%d)\n", 2082 "interface MTU (%d) set higher than port MTU (%d)\n",
2066 netdev->mtu, enic->port_mtu); 2083 netdev->mtu, enic->port_mtu);
2067 2084
2068 if (running) 2085 return _enic_change_mtu(netdev, new_mtu);
2069 enic_open(netdev);
2070
2071 return 0;
2072} 2086}
2073 2087
2074static void enic_change_mtu_work(struct work_struct *work) 2088static void enic_change_mtu_work(struct work_struct *work)
@@ -2076,47 +2090,9 @@ static void enic_change_mtu_work(struct work_struct *work)
2076 struct enic *enic = container_of(work, struct enic, change_mtu_work); 2090 struct enic *enic = container_of(work, struct enic, change_mtu_work);
2077 struct net_device *netdev = enic->netdev; 2091 struct net_device *netdev = enic->netdev;
2078 int new_mtu = vnic_dev_mtu(enic->vdev); 2092 int new_mtu = vnic_dev_mtu(enic->vdev);
2079 int err;
2080 unsigned int i;
2081
2082 new_mtu = max_t(int, ENIC_MIN_MTU, min_t(int, ENIC_MAX_MTU, new_mtu));
2083 2093
2084 rtnl_lock(); 2094 rtnl_lock();
2085 2095 (void)_enic_change_mtu(netdev, new_mtu);
2086 /* Stop RQ */
2087 del_timer_sync(&enic->notify_timer);
2088
2089 for (i = 0; i < enic->rq_count; i++)
2090 napi_disable(&enic->napi[i]);
2091
2092 vnic_intr_mask(&enic->intr[0]);
2093 enic_synchronize_irqs(enic);
2094 err = vnic_rq_disable(&enic->rq[0]);
2095 if (err) {
2096 rtnl_unlock();
2097 netdev_err(netdev, "Unable to disable RQ.\n");
2098 return;
2099 }
2100 vnic_rq_clean(&enic->rq[0], enic_free_rq_buf);
2101 vnic_cq_clean(&enic->cq[0]);
2102 vnic_intr_clean(&enic->intr[0]);
2103
2104 /* Fill RQ with new_mtu-sized buffers */
2105 netdev->mtu = new_mtu;
2106 vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf);
2107 /* Need at least one buffer on ring to get going */
2108 if (vnic_rq_desc_used(&enic->rq[0]) == 0) {
2109 rtnl_unlock();
2110 netdev_err(netdev, "Unable to alloc receive buffers.\n");
2111 return;
2112 }
2113
2114 /* Start RQ */
2115 vnic_rq_enable(&enic->rq[0]);
2116 napi_enable(&enic->napi[0]);
2117 vnic_intr_unmask(&enic->intr[0]);
2118 enic_notify_timer_start(enic);
2119
2120 rtnl_unlock(); 2096 rtnl_unlock();
2121 2097
2122 netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); 2098 netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu);
@@ -2916,7 +2892,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2916 */ 2892 */
2917 2893
2918 enic->port_mtu = enic->config.mtu; 2894 enic->port_mtu = enic->config.mtu;
2919 (void)enic_change_mtu(netdev, enic->port_mtu);
2920 2895
2921 err = enic_set_mac_addr(netdev, enic->mac_addr); 2896 err = enic_set_mac_addr(netdev, enic->mac_addr);
2922 if (err) { 2897 if (err) {
@@ -3006,6 +2981,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3006 /* MTU range: 68 - 9000 */ 2981 /* MTU range: 68 - 9000 */
3007 netdev->min_mtu = ENIC_MIN_MTU; 2982 netdev->min_mtu = ENIC_MIN_MTU;
3008 netdev->max_mtu = ENIC_MAX_MTU; 2983 netdev->max_mtu = ENIC_MAX_MTU;
2984 netdev->mtu = enic->port_mtu;
3009 2985
3010 err = register_netdev(netdev); 2986 err = register_netdev(netdev);
3011 if (err) { 2987 if (err) {
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 5b122728dcb4..09e9da10b786 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -983,6 +983,7 @@ static int nic_dev_init(struct pci_dev *pdev)
983 hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS, 983 hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
984 nic_dev, link_status_event_handler); 984 nic_dev, link_status_event_handler);
985 985
986 SET_NETDEV_DEV(netdev, &pdev->dev);
986 err = register_netdev(netdev); 987 err = register_netdev(netdev);
987 if (err) { 988 if (err) {
988 dev_err(&pdev->dev, "Failed to register netdev\n"); 989 dev_err(&pdev->dev, "Failed to register netdev\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index eb9eb7aa953a..405236cf0b04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -858,8 +858,6 @@ struct mlx5e_profile {
858 mlx5e_fp_handle_rx_cqe handle_rx_cqe; 858 mlx5e_fp_handle_rx_cqe handle_rx_cqe;
859 mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; 859 mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
860 } rx_handlers; 860 } rx_handlers;
861 void (*netdev_registered_init)(struct mlx5e_priv *priv);
862 void (*netdev_registered_remove)(struct mlx5e_priv *priv);
863 int max_tc; 861 int max_tc;
864}; 862};
865 863
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 86bc9ac99586..722998d68564 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -443,16 +443,12 @@ static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app)
443 bool is_new; 443 bool is_new;
444 int err; 444 int err;
445 445
446 if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP) 446 if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) ||
447 return -EINVAL; 447 !MLX5_DSCP_SUPPORTED(priv->mdev))
448 448 return -EOPNOTSUPP;
449 if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
450 return -EINVAL;
451
452 if (!MLX5_DSCP_SUPPORTED(priv->mdev))
453 return -EINVAL;
454 449
455 if (app->protocol >= MLX5E_MAX_DSCP) 450 if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) ||
451 (app->protocol >= MLX5E_MAX_DSCP))
456 return -EINVAL; 452 return -EINVAL;
457 453
458 /* Save the old entry info */ 454 /* Save the old entry info */
@@ -500,16 +496,12 @@ static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app)
500 struct mlx5e_priv *priv = netdev_priv(dev); 496 struct mlx5e_priv *priv = netdev_priv(dev);
501 int err; 497 int err;
502 498
503 if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP) 499 if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) ||
504 return -EINVAL; 500 !MLX5_DSCP_SUPPORTED(priv->mdev))
505 501 return -EOPNOTSUPP;
506 if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
507 return -EINVAL;
508
509 if (!MLX5_DSCP_SUPPORTED(priv->mdev))
510 return -EINVAL;
511 502
512 if (app->protocol >= MLX5E_MAX_DSCP) 503 if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) ||
504 (app->protocol >= MLX5E_MAX_DSCP))
513 return -EINVAL; 505 return -EINVAL;
514 506
515 /* Skip if no dscp app entry */ 507 /* Skip if no dscp app entry */
@@ -1146,7 +1138,7 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
1146{ 1138{
1147 int err; 1139 int err;
1148 1140
1149 err = mlx5_set_trust_state(priv->mdev, trust_state); 1141 err = mlx5_set_trust_state(priv->mdev, trust_state);
1150 if (err) 1142 if (err)
1151 return err; 1143 return err;
1152 priv->dcbx_dp.trust_state = trust_state; 1144 priv->dcbx_dp.trust_state = trust_state;
@@ -1172,6 +1164,8 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
1172 struct mlx5_core_dev *mdev = priv->mdev; 1164 struct mlx5_core_dev *mdev = priv->mdev;
1173 int err; 1165 int err;
1174 1166
1167 priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
1168
1175 if (!MLX5_DSCP_SUPPORTED(mdev)) 1169 if (!MLX5_DSCP_SUPPORTED(mdev))
1176 return 0; 1170 return 0;
1177 1171
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index dae4156a710d..c592678ab5f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3712,7 +3712,8 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
3712 3712
3713 if (!reset) { 3713 if (!reset) {
3714 params->sw_mtu = new_mtu; 3714 params->sw_mtu = new_mtu;
3715 set_mtu_cb(priv); 3715 if (set_mtu_cb)
3716 set_mtu_cb(priv);
3716 netdev->mtu = params->sw_mtu; 3717 netdev->mtu = params->sw_mtu;
3717 goto out; 3718 goto out;
3718 } 3719 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3a2c4e548226..dfbcda0d0e08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1970,15 +1970,15 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
1970static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 1970static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
1971{ 1971{
1972 struct mlx5_core_dev *fmdev, *pmdev; 1972 struct mlx5_core_dev *fmdev, *pmdev;
1973 u16 func_id, peer_id; 1973 u64 fsystem_guid, psystem_guid;
1974 1974
1975 fmdev = priv->mdev; 1975 fmdev = priv->mdev;
1976 pmdev = peer_priv->mdev; 1976 pmdev = peer_priv->mdev;
1977 1977
1978 func_id = (u16)((fmdev->pdev->bus->number << 8) | PCI_SLOT(fmdev->pdev->devfn)); 1978 mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
1979 peer_id = (u16)((pmdev->pdev->bus->number << 8) | PCI_SLOT(pmdev->pdev->devfn)); 1979 mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
1980 1980
1981 return (func_id == peer_id); 1981 return (fsystem_guid == psystem_guid);
1982} 1982}
1983 1983
1984static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, 1984static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index dd01ad4c0b54..40dba9e8af92 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1696,7 +1696,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
1696 int vport_num; 1696 int vport_num;
1697 int err; 1697 int err;
1698 1698
1699 if (!MLX5_VPORT_MANAGER(dev)) 1699 if (!MLX5_ESWITCH_MANAGER(dev))
1700 return 0; 1700 return 0;
1701 1701
1702 esw_info(dev, 1702 esw_info(dev,
@@ -1765,7 +1765,7 @@ abort:
1765 1765
1766void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) 1766void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
1767{ 1767{
1768 if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) 1768 if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev))
1769 return; 1769 return;
1770 1770
1771 esw_info(esw->dev, "cleanup\n"); 1771 esw_info(esw->dev, "cleanup\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index af3bb2f7a504..b7c21eb21a21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -76,6 +76,7 @@ void mlx5i_init(struct mlx5_core_dev *mdev,
76 void *ppriv) 76 void *ppriv)
77{ 77{
78 struct mlx5e_priv *priv = mlx5i_epriv(netdev); 78 struct mlx5e_priv *priv = mlx5i_epriv(netdev);
79 u16 max_mtu;
79 80
80 /* priv init */ 81 /* priv init */
81 priv->mdev = mdev; 82 priv->mdev = mdev;
@@ -84,6 +85,9 @@ void mlx5i_init(struct mlx5_core_dev *mdev,
84 priv->ppriv = ppriv; 85 priv->ppriv = ppriv;
85 mutex_init(&priv->state_lock); 86 mutex_init(&priv->state_lock);
86 87
88 mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
89 netdev->mtu = max_mtu;
90
87 mlx5e_build_nic_params(mdev, &priv->channels.params, 91 mlx5e_build_nic_params(mdev, &priv->channels.params,
88 profile->max_nch(mdev), netdev->mtu); 92 profile->max_nch(mdev), netdev->mtu);
89 mlx5i_build_nic_params(mdev, &priv->channels.params); 93 mlx5i_build_nic_params(mdev, &priv->channels.params);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 3c0d882ba183..f6f6a568d66a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -327,12 +327,16 @@ static void mlxsw_afa_resource_add(struct mlxsw_afa_block *block,
327 list_add(&resource->list, &block->resource_list); 327 list_add(&resource->list, &block->resource_list);
328} 328}
329 329
330static void mlxsw_afa_resource_del(struct mlxsw_afa_resource *resource)
331{
332 list_del(&resource->list);
333}
334
330static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block) 335static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block)
331{ 336{
332 struct mlxsw_afa_resource *resource, *tmp; 337 struct mlxsw_afa_resource *resource, *tmp;
333 338
334 list_for_each_entry_safe(resource, tmp, &block->resource_list, list) { 339 list_for_each_entry_safe(resource, tmp, &block->resource_list, list) {
335 list_del(&resource->list);
336 resource->destructor(block, resource); 340 resource->destructor(block, resource);
337 } 341 }
338} 342}
@@ -530,6 +534,7 @@ static void
530mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block, 534mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block,
531 struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref) 535 struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref)
532{ 536{
537 mlxsw_afa_resource_del(&fwd_entry_ref->resource);
533 mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry); 538 mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry);
534 kfree(fwd_entry_ref); 539 kfree(fwd_entry_ref);
535} 540}
@@ -579,6 +584,7 @@ static void
579mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block, 584mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block,
580 struct mlxsw_afa_counter *counter) 585 struct mlxsw_afa_counter *counter)
581{ 586{
587 mlxsw_afa_resource_del(&counter->resource);
582 block->afa->ops->counter_index_put(block->afa->ops_priv, 588 block->afa->ops->counter_index_put(block->afa->ops_priv,
583 counter->counter_index); 589 counter->counter_index);
584 kfree(counter); 590 kfree(counter);
@@ -626,8 +632,8 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block,
626 char *oneact; 632 char *oneact;
627 char *actions; 633 char *actions;
628 634
629 if (WARN_ON(block->finished)) 635 if (block->finished)
630 return NULL; 636 return ERR_PTR(-EINVAL);
631 if (block->cur_act_index + action_size > 637 if (block->cur_act_index + action_size >
632 block->afa->max_acts_per_set) { 638 block->afa->max_acts_per_set) {
633 struct mlxsw_afa_set *set; 639 struct mlxsw_afa_set *set;
@@ -637,7 +643,7 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block,
637 */ 643 */
638 set = mlxsw_afa_set_create(false); 644 set = mlxsw_afa_set_create(false);
639 if (!set) 645 if (!set)
640 return NULL; 646 return ERR_PTR(-ENOBUFS);
641 set->prev = block->cur_set; 647 set->prev = block->cur_set;
642 block->cur_act_index = 0; 648 block->cur_act_index = 0;
643 block->cur_set->next = set; 649 block->cur_set->next = set;
@@ -724,8 +730,8 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
724 MLXSW_AFA_VLAN_CODE, 730 MLXSW_AFA_VLAN_CODE,
725 MLXSW_AFA_VLAN_SIZE); 731 MLXSW_AFA_VLAN_SIZE);
726 732
727 if (!act) 733 if (IS_ERR(act))
728 return -ENOBUFS; 734 return PTR_ERR(act);
729 mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, 735 mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP,
730 MLXSW_AFA_VLAN_CMD_SET_OUTER, vid, 736 MLXSW_AFA_VLAN_CMD_SET_OUTER, vid,
731 MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp, 737 MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp,
@@ -806,8 +812,8 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block)
806 MLXSW_AFA_TRAPDISC_CODE, 812 MLXSW_AFA_TRAPDISC_CODE,
807 MLXSW_AFA_TRAPDISC_SIZE); 813 MLXSW_AFA_TRAPDISC_SIZE);
808 814
809 if (!act) 815 if (IS_ERR(act))
810 return -ENOBUFS; 816 return PTR_ERR(act);
811 mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, 817 mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP,
812 MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0); 818 MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0);
813 return 0; 819 return 0;
@@ -820,8 +826,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id)
820 MLXSW_AFA_TRAPDISC_CODE, 826 MLXSW_AFA_TRAPDISC_CODE,
821 MLXSW_AFA_TRAPDISC_SIZE); 827 MLXSW_AFA_TRAPDISC_SIZE);
822 828
823 if (!act) 829 if (IS_ERR(act))
824 return -ENOBUFS; 830 return PTR_ERR(act);
825 mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, 831 mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
826 MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 832 MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD,
827 trap_id); 833 trap_id);
@@ -836,8 +842,8 @@ int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
836 MLXSW_AFA_TRAPDISC_CODE, 842 MLXSW_AFA_TRAPDISC_CODE,
837 MLXSW_AFA_TRAPDISC_SIZE); 843 MLXSW_AFA_TRAPDISC_SIZE);
838 844
839 if (!act) 845 if (IS_ERR(act))
840 return -ENOBUFS; 846 return PTR_ERR(act);
841 mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, 847 mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
842 MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 848 MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD,
843 trap_id); 849 trap_id);
@@ -856,6 +862,7 @@ static void
856mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block, 862mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
857 struct mlxsw_afa_mirror *mirror) 863 struct mlxsw_afa_mirror *mirror)
858{ 864{
865 mlxsw_afa_resource_del(&mirror->resource);
859 block->afa->ops->mirror_del(block->afa->ops_priv, 866 block->afa->ops->mirror_del(block->afa->ops_priv,
860 mirror->local_in_port, 867 mirror->local_in_port,
861 mirror->span_id, 868 mirror->span_id,
@@ -908,8 +915,8 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
908 char *act = mlxsw_afa_block_append_action(block, 915 char *act = mlxsw_afa_block_append_action(block,
909 MLXSW_AFA_TRAPDISC_CODE, 916 MLXSW_AFA_TRAPDISC_CODE,
910 MLXSW_AFA_TRAPDISC_SIZE); 917 MLXSW_AFA_TRAPDISC_SIZE);
911 if (!act) 918 if (IS_ERR(act))
912 return -ENOBUFS; 919 return PTR_ERR(act);
913 mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, 920 mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP,
914 MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0); 921 MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0);
915 mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent); 922 mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent);
@@ -996,8 +1003,8 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
996 1003
997 act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE, 1004 act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE,
998 MLXSW_AFA_FORWARD_SIZE); 1005 MLXSW_AFA_FORWARD_SIZE);
999 if (!act) { 1006 if (IS_ERR(act)) {
1000 err = -ENOBUFS; 1007 err = PTR_ERR(act);
1001 goto err_append_action; 1008 goto err_append_action;
1002 } 1009 }
1003 mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS, 1010 mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS,
@@ -1052,8 +1059,8 @@ int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block,
1052{ 1059{
1053 char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE, 1060 char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE,
1054 MLXSW_AFA_POLCNT_SIZE); 1061 MLXSW_AFA_POLCNT_SIZE);
1055 if (!act) 1062 if (IS_ERR(act))
1056 return -ENOBUFS; 1063 return PTR_ERR(act);
1057 mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES, 1064 mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES,
1058 counter_index); 1065 counter_index);
1059 return 0; 1066 return 0;
@@ -1123,8 +1130,8 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid)
1123 char *act = mlxsw_afa_block_append_action(block, 1130 char *act = mlxsw_afa_block_append_action(block,
1124 MLXSW_AFA_VIRFWD_CODE, 1131 MLXSW_AFA_VIRFWD_CODE,
1125 MLXSW_AFA_VIRFWD_SIZE); 1132 MLXSW_AFA_VIRFWD_SIZE);
1126 if (!act) 1133 if (IS_ERR(act))
1127 return -ENOBUFS; 1134 return PTR_ERR(act);
1128 mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid); 1135 mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid);
1129 return 0; 1136 return 0;
1130} 1137}
@@ -1193,8 +1200,8 @@ int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
1193 char *act = mlxsw_afa_block_append_action(block, 1200 char *act = mlxsw_afa_block_append_action(block,
1194 MLXSW_AFA_MCROUTER_CODE, 1201 MLXSW_AFA_MCROUTER_CODE,
1195 MLXSW_AFA_MCROUTER_SIZE); 1202 MLXSW_AFA_MCROUTER_SIZE);
1196 if (!act) 1203 if (IS_ERR(act))
1197 return -ENOBUFS; 1204 return PTR_ERR(act);
1198 mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, 1205 mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP,
1199 expected_irif, min_mtu, rmid_valid, kvdl_index); 1206 expected_irif, min_mtu, rmid_valid, kvdl_index);
1200 return 0; 1207 return 0;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 1decf3a1cad3..e57d23746585 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -80,7 +80,7 @@ nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port)
80 return NFP_REPR_TYPE_VF; 80 return NFP_REPR_TYPE_VF;
81 } 81 }
82 82
83 return NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC; 83 return __NFP_REPR_TYPE_MAX;
84} 84}
85 85
86static struct net_device * 86static struct net_device *
@@ -91,6 +91,8 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id)
91 u8 port = 0; 91 u8 port = 0;
92 92
93 repr_type = nfp_flower_repr_get_type_and_port(app, port_id, &port); 93 repr_type = nfp_flower_repr_get_type_and_port(app, port_id, &port);
94 if (repr_type > NFP_REPR_TYPE_MAX)
95 return NULL;
94 96
95 reprs = rcu_dereference(app->reprs[repr_type]); 97 reprs = rcu_dereference(app->reprs[repr_type]);
96 if (!reprs) 98 if (!reprs)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 60f59abab009..ef6a8d39db2f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -53,7 +53,7 @@
53#include "dwmac1000.h" 53#include "dwmac1000.h"
54#include "hwif.h" 54#include "hwif.h"
55 55
56#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) 56#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES)
57#define TSO_MAX_BUFF_SIZE (SZ_16K - 1) 57#define TSO_MAX_BUFF_SIZE (SZ_16K - 1)
58 58
59/* Module parameters */ 59/* Module parameters */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 8d375e51a526..6a393b16a1fc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -257,7 +257,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
257 return -ENOMEM; 257 return -ENOMEM;
258 258
259 /* Enable pci device */ 259 /* Enable pci device */
260 ret = pcim_enable_device(pdev); 260 ret = pci_enable_device(pdev);
261 if (ret) { 261 if (ret) {
262 dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", 262 dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n",
263 __func__); 263 __func__);
@@ -300,9 +300,45 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
300static void stmmac_pci_remove(struct pci_dev *pdev) 300static void stmmac_pci_remove(struct pci_dev *pdev)
301{ 301{
302 stmmac_dvr_remove(&pdev->dev); 302 stmmac_dvr_remove(&pdev->dev);
303 pci_disable_device(pdev);
303} 304}
304 305
305static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_suspend, stmmac_resume); 306static int stmmac_pci_suspend(struct device *dev)
307{
308 struct pci_dev *pdev = to_pci_dev(dev);
309 int ret;
310
311 ret = stmmac_suspend(dev);
312 if (ret)
313 return ret;
314
315 ret = pci_save_state(pdev);
316 if (ret)
317 return ret;
318
319 pci_disable_device(pdev);
320 pci_wake_from_d3(pdev, true);
321 return 0;
322}
323
324static int stmmac_pci_resume(struct device *dev)
325{
326 struct pci_dev *pdev = to_pci_dev(dev);
327 int ret;
328
329 pci_restore_state(pdev);
330 pci_set_power_state(pdev, PCI_D0);
331
332 ret = pci_enable_device(pdev);
333 if (ret)
334 return ret;
335
336 pci_set_master(pdev);
337
338 return stmmac_resume(dev);
339}
340
341static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_pci_suspend, stmmac_pci_resume);
306 342
307/* synthetic ID, no official vendor */ 343/* synthetic ID, no official vendor */
308#define PCI_VENDOR_ID_STMMAC 0x700 344#define PCI_VENDOR_ID_STMMAC 0x700
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 358edab9e72e..3e34cb8ac1d3 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2086,14 +2086,16 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
2086 int i; 2086 int i;
2087 2087
2088 for (i = 0; i < cpsw->data.slaves; i++) { 2088 for (i = 0; i < cpsw->data.slaves; i++) {
2089 if (vid == cpsw->slaves[i].port_vlan) 2089 if (vid == cpsw->slaves[i].port_vlan) {
2090 return -EINVAL; 2090 ret = -EINVAL;
2091 goto err;
2092 }
2091 } 2093 }
2092 } 2094 }
2093 2095
2094 dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid); 2096 dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid);
2095 ret = cpsw_add_vlan_ale_entry(priv, vid); 2097 ret = cpsw_add_vlan_ale_entry(priv, vid);
2096 2098err:
2097 pm_runtime_put(cpsw->dev); 2099 pm_runtime_put(cpsw->dev);
2098 return ret; 2100 return ret;
2099} 2101}
@@ -2119,22 +2121,17 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
2119 2121
2120 for (i = 0; i < cpsw->data.slaves; i++) { 2122 for (i = 0; i < cpsw->data.slaves; i++) {
2121 if (vid == cpsw->slaves[i].port_vlan) 2123 if (vid == cpsw->slaves[i].port_vlan)
2122 return -EINVAL; 2124 goto err;
2123 } 2125 }
2124 } 2126 }
2125 2127
2126 dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid); 2128 dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid);
2127 ret = cpsw_ale_del_vlan(cpsw->ale, vid, 0); 2129 ret = cpsw_ale_del_vlan(cpsw->ale, vid, 0);
2128 if (ret != 0) 2130 ret |= cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr,
2129 return ret; 2131 HOST_PORT_NUM, ALE_VLAN, vid);
2130 2132 ret |= cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast,
2131 ret = cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, 2133 0, ALE_VLAN, vid);
2132 HOST_PORT_NUM, ALE_VLAN, vid); 2134err:
2133 if (ret != 0)
2134 return ret;
2135
2136 ret = cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast,
2137 0, ALE_VLAN, vid);
2138 pm_runtime_put(cpsw->dev); 2135 pm_runtime_put(cpsw->dev);
2139 return ret; 2136 return ret;
2140} 2137}
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 93dc05c194d3..5766225a4ce1 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -394,7 +394,7 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
394 394
395 idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0); 395 idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0);
396 if (idx < 0) 396 if (idx < 0)
397 return -EINVAL; 397 return -ENOENT;
398 398
399 cpsw_ale_read(ale, idx, ale_entry); 399 cpsw_ale_read(ale, idx, ale_entry);
400 400
diff --git a/drivers/net/netdevsim/devlink.c b/drivers/net/netdevsim/devlink.c
index ba663e5af168..5135fc371f01 100644
--- a/drivers/net/netdevsim/devlink.c
+++ b/drivers/net/netdevsim/devlink.c
@@ -207,6 +207,7 @@ void nsim_devlink_teardown(struct netdevsim *ns)
207 struct net *net = nsim_to_net(ns); 207 struct net *net = nsim_to_net(ns);
208 bool *reg_devlink = net_generic(net, nsim_devlink_id); 208 bool *reg_devlink = net_generic(net, nsim_devlink_id);
209 209
210 devlink_resources_unregister(ns->devlink, NULL);
210 devlink_unregister(ns->devlink); 211 devlink_unregister(ns->devlink);
211 devlink_free(ns->devlink); 212 devlink_free(ns->devlink);
212 ns->devlink = NULL; 213 ns->devlink = NULL;
diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
index 0831b7142df7..0c5b68e7da51 100644
--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -218,7 +218,7 @@ out:
218 218
219static int mdio_mux_iproc_remove(struct platform_device *pdev) 219static int mdio_mux_iproc_remove(struct platform_device *pdev)
220{ 220{
221 struct iproc_mdiomux_desc *md = dev_get_platdata(&pdev->dev); 221 struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
222 222
223 mdio_mux_uninit(md->mux_handle); 223 mdio_mux_uninit(md->mux_handle);
224 mdiobus_unregister(md->mii_bus); 224 mdiobus_unregister(md->mii_bus);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index ed10d49eb5e0..aeca484a75b8 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1242,6 +1242,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
1242 mod_timer(&dev->stat_monitor, 1242 mod_timer(&dev->stat_monitor,
1243 jiffies + STAT_UPDATE_TIMER); 1243 jiffies + STAT_UPDATE_TIMER);
1244 } 1244 }
1245
1246 tasklet_schedule(&dev->bh);
1245 } 1247 }
1246 1248
1247 return ret; 1249 return ret;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 53085c63277b..2b6ec927809e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -586,7 +586,8 @@ static struct sk_buff *receive_small(struct net_device *dev,
586 struct receive_queue *rq, 586 struct receive_queue *rq,
587 void *buf, void *ctx, 587 void *buf, void *ctx,
588 unsigned int len, 588 unsigned int len,
589 unsigned int *xdp_xmit) 589 unsigned int *xdp_xmit,
590 unsigned int *rbytes)
590{ 591{
591 struct sk_buff *skb; 592 struct sk_buff *skb;
592 struct bpf_prog *xdp_prog; 593 struct bpf_prog *xdp_prog;
@@ -601,6 +602,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
601 int err; 602 int err;
602 603
603 len -= vi->hdr_len; 604 len -= vi->hdr_len;
605 *rbytes += len;
604 606
605 rcu_read_lock(); 607 rcu_read_lock();
606 xdp_prog = rcu_dereference(rq->xdp_prog); 608 xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -705,11 +707,13 @@ static struct sk_buff *receive_big(struct net_device *dev,
705 struct virtnet_info *vi, 707 struct virtnet_info *vi,
706 struct receive_queue *rq, 708 struct receive_queue *rq,
707 void *buf, 709 void *buf,
708 unsigned int len) 710 unsigned int len,
711 unsigned int *rbytes)
709{ 712{
710 struct page *page = buf; 713 struct page *page = buf;
711 struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); 714 struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
712 715
716 *rbytes += len - vi->hdr_len;
713 if (unlikely(!skb)) 717 if (unlikely(!skb))
714 goto err; 718 goto err;
715 719
@@ -727,7 +731,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
727 void *buf, 731 void *buf,
728 void *ctx, 732 void *ctx,
729 unsigned int len, 733 unsigned int len,
730 unsigned int *xdp_xmit) 734 unsigned int *xdp_xmit,
735 unsigned int *rbytes)
731{ 736{
732 struct virtio_net_hdr_mrg_rxbuf *hdr = buf; 737 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
733 u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); 738 u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
@@ -740,6 +745,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
740 int err; 745 int err;
741 746
742 head_skb = NULL; 747 head_skb = NULL;
748 *rbytes += len - vi->hdr_len;
743 749
744 rcu_read_lock(); 750 rcu_read_lock();
745 xdp_prog = rcu_dereference(rq->xdp_prog); 751 xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -877,6 +883,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
877 goto err_buf; 883 goto err_buf;
878 } 884 }
879 885
886 *rbytes += len;
880 page = virt_to_head_page(buf); 887 page = virt_to_head_page(buf);
881 888
882 truesize = mergeable_ctx_to_truesize(ctx); 889 truesize = mergeable_ctx_to_truesize(ctx);
@@ -932,6 +939,7 @@ err_skb:
932 dev->stats.rx_length_errors++; 939 dev->stats.rx_length_errors++;
933 break; 940 break;
934 } 941 }
942 *rbytes += len;
935 page = virt_to_head_page(buf); 943 page = virt_to_head_page(buf);
936 put_page(page); 944 put_page(page);
937 } 945 }
@@ -942,14 +950,13 @@ xdp_xmit:
942 return NULL; 950 return NULL;
943} 951}
944 952
945static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 953static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
946 void *buf, unsigned int len, void **ctx, 954 void *buf, unsigned int len, void **ctx,
947 unsigned int *xdp_xmit) 955 unsigned int *xdp_xmit, unsigned int *rbytes)
948{ 956{
949 struct net_device *dev = vi->dev; 957 struct net_device *dev = vi->dev;
950 struct sk_buff *skb; 958 struct sk_buff *skb;
951 struct virtio_net_hdr_mrg_rxbuf *hdr; 959 struct virtio_net_hdr_mrg_rxbuf *hdr;
952 int ret;
953 960
954 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 961 if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
955 pr_debug("%s: short packet %i\n", dev->name, len); 962 pr_debug("%s: short packet %i\n", dev->name, len);
@@ -961,23 +968,22 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
961 } else { 968 } else {
962 put_page(virt_to_head_page(buf)); 969 put_page(virt_to_head_page(buf));
963 } 970 }
964 return 0; 971 return;
965 } 972 }
966 973
967 if (vi->mergeable_rx_bufs) 974 if (vi->mergeable_rx_bufs)
968 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit); 975 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
976 rbytes);
969 else if (vi->big_packets) 977 else if (vi->big_packets)
970 skb = receive_big(dev, vi, rq, buf, len); 978 skb = receive_big(dev, vi, rq, buf, len, rbytes);
971 else 979 else
972 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit); 980 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, rbytes);
973 981
974 if (unlikely(!skb)) 982 if (unlikely(!skb))
975 return 0; 983 return;
976 984
977 hdr = skb_vnet_hdr(skb); 985 hdr = skb_vnet_hdr(skb);
978 986
979 ret = skb->len;
980
981 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) 987 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
982 skb->ip_summed = CHECKSUM_UNNECESSARY; 988 skb->ip_summed = CHECKSUM_UNNECESSARY;
983 989
@@ -994,12 +1000,11 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
994 ntohs(skb->protocol), skb->len, skb->pkt_type); 1000 ntohs(skb->protocol), skb->len, skb->pkt_type);
995 1001
996 napi_gro_receive(&rq->napi, skb); 1002 napi_gro_receive(&rq->napi, skb);
997 return ret; 1003 return;
998 1004
999frame_err: 1005frame_err:
1000 dev->stats.rx_frame_errors++; 1006 dev->stats.rx_frame_errors++;
1001 dev_kfree_skb(skb); 1007 dev_kfree_skb(skb);
1002 return 0;
1003} 1008}
1004 1009
1005/* Unlike mergeable buffers, all buffers are allocated to the 1010/* Unlike mergeable buffers, all buffers are allocated to the
@@ -1249,13 +1254,13 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
1249 1254
1250 while (received < budget && 1255 while (received < budget &&
1251 (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { 1256 (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
1252 bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit); 1257 receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &bytes);
1253 received++; 1258 received++;
1254 } 1259 }
1255 } else { 1260 } else {
1256 while (received < budget && 1261 while (received < budget &&
1257 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { 1262 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
1258 bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit); 1263 receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &bytes);
1259 received++; 1264 received++;
1260 } 1265 }
1261 } 1266 }
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 90a4ad9a2d08..b3a1b6f5c406 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1362,7 +1362,7 @@ static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/
1362 case 0x001: 1362 case 0x001:
1363 printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name); 1363 printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name);
1364 break; 1364 break;
1365 case 0x010: 1365 case 0x002:
1366 printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name); 1366 printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name);
1367 break; 1367 break;
1368 default: 1368 default:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index 45928b5b8d97..4fffa6988087 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1785,7 +1785,8 @@ brcmf_pcie_prepare_fw_request(struct brcmf_pciedev_info *devinfo)
1785 fwreq->items[BRCMF_PCIE_FW_CODE].type = BRCMF_FW_TYPE_BINARY; 1785 fwreq->items[BRCMF_PCIE_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
1786 fwreq->items[BRCMF_PCIE_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM; 1786 fwreq->items[BRCMF_PCIE_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM;
1787 fwreq->items[BRCMF_PCIE_FW_NVRAM].flags = BRCMF_FW_REQF_OPTIONAL; 1787 fwreq->items[BRCMF_PCIE_FW_NVRAM].flags = BRCMF_FW_REQF_OPTIONAL;
1788 fwreq->domain_nr = pci_domain_nr(devinfo->pdev->bus); 1788 /* NVRAM reserves PCI domain 0 for Broadcom's SDK faked bus */
1789 fwreq->domain_nr = pci_domain_nr(devinfo->pdev->bus) + 1;
1789 fwreq->bus_nr = devinfo->pdev->bus->number; 1790 fwreq->bus_nr = devinfo->pdev->bus->number;
1790 1791
1791 return fwreq; 1792 return fwreq;
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
index e20c30b29c03..c8ea63d02619 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
@@ -178,6 +178,17 @@ const struct iwl_cfg iwl9260_2ac_cfg = {
178 .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, 178 .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
179}; 179};
180 180
181const struct iwl_cfg iwl9260_killer_2ac_cfg = {
182 .name = "Killer (R) Wireless-AC 1550 Wireless Network Adapter (9260NGW)",
183 .fw_name_pre = IWL9260A_FW_PRE,
184 .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE,
185 IWL_DEVICE_9000,
186 .ht_params = &iwl9000_ht_params,
187 .nvm_ver = IWL9000_NVM_VERSION,
188 .nvm_calib_ver = IWL9000_TX_POWER_VERSION,
189 .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
190};
191
181const struct iwl_cfg iwl9270_2ac_cfg = { 192const struct iwl_cfg iwl9270_2ac_cfg = {
182 .name = "Intel(R) Dual Band Wireless AC 9270", 193 .name = "Intel(R) Dual Band Wireless AC 9270",
183 .fw_name_pre = IWL9260A_FW_PRE, 194 .fw_name_pre = IWL9260A_FW_PRE,
@@ -267,6 +278,34 @@ const struct iwl_cfg iwl9560_2ac_cfg_soc = {
267 .soc_latency = 5000, 278 .soc_latency = 5000,
268}; 279};
269 280
281const struct iwl_cfg iwl9560_killer_2ac_cfg_soc = {
282 .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)",
283 .fw_name_pre = IWL9000A_FW_PRE,
284 .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE,
285 .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE,
286 IWL_DEVICE_9000,
287 .ht_params = &iwl9000_ht_params,
288 .nvm_ver = IWL9000_NVM_VERSION,
289 .nvm_calib_ver = IWL9000_TX_POWER_VERSION,
290 .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
291 .integrated = true,
292 .soc_latency = 5000,
293};
294
295const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc = {
296 .name = "Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)",
297 .fw_name_pre = IWL9000A_FW_PRE,
298 .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE,
299 .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE,
300 IWL_DEVICE_9000,
301 .ht_params = &iwl9000_ht_params,
302 .nvm_ver = IWL9000_NVM_VERSION,
303 .nvm_calib_ver = IWL9000_TX_POWER_VERSION,
304 .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
305 .integrated = true,
306 .soc_latency = 5000,
307};
308
270const struct iwl_cfg iwl9460_2ac_cfg_shared_clk = { 309const struct iwl_cfg iwl9460_2ac_cfg_shared_clk = {
271 .name = "Intel(R) Dual Band Wireless AC 9460", 310 .name = "Intel(R) Dual Band Wireless AC 9460",
272 .fw_name_pre = IWL9000A_FW_PRE, 311 .fw_name_pre = IWL9000A_FW_PRE,
@@ -327,6 +366,36 @@ const struct iwl_cfg iwl9560_2ac_cfg_shared_clk = {
327 .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK 366 .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK
328}; 367};
329 368
369const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk = {
370 .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)",
371 .fw_name_pre = IWL9000A_FW_PRE,
372 .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE,
373 .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE,
374 IWL_DEVICE_9000,
375 .ht_params = &iwl9000_ht_params,
376 .nvm_ver = IWL9000_NVM_VERSION,
377 .nvm_calib_ver = IWL9000_TX_POWER_VERSION,
378 .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
379 .integrated = true,
380 .soc_latency = 5000,
381 .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK
382};
383
384const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk = {
385 .name = "Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)",
386 .fw_name_pre = IWL9000A_FW_PRE,
387 .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE,
388 .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE,
389 IWL_DEVICE_9000,
390 .ht_params = &iwl9000_ht_params,
391 .nvm_ver = IWL9000_NVM_VERSION,
392 .nvm_calib_ver = IWL9000_TX_POWER_VERSION,
393 .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
394 .integrated = true,
395 .soc_latency = 5000,
396 .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK
397};
398
330MODULE_FIRMWARE(IWL9000A_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); 399MODULE_FIRMWARE(IWL9000A_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
331MODULE_FIRMWARE(IWL9000B_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); 400MODULE_FIRMWARE(IWL9000B_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
332MODULE_FIRMWARE(IWL9000RFB_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); 401MODULE_FIRMWARE(IWL9000RFB_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index c503b26793f6..84a816809723 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -551,6 +551,7 @@ extern const struct iwl_cfg iwl8275_2ac_cfg;
551extern const struct iwl_cfg iwl4165_2ac_cfg; 551extern const struct iwl_cfg iwl4165_2ac_cfg;
552extern const struct iwl_cfg iwl9160_2ac_cfg; 552extern const struct iwl_cfg iwl9160_2ac_cfg;
553extern const struct iwl_cfg iwl9260_2ac_cfg; 553extern const struct iwl_cfg iwl9260_2ac_cfg;
554extern const struct iwl_cfg iwl9260_killer_2ac_cfg;
554extern const struct iwl_cfg iwl9270_2ac_cfg; 555extern const struct iwl_cfg iwl9270_2ac_cfg;
555extern const struct iwl_cfg iwl9460_2ac_cfg; 556extern const struct iwl_cfg iwl9460_2ac_cfg;
556extern const struct iwl_cfg iwl9560_2ac_cfg; 557extern const struct iwl_cfg iwl9560_2ac_cfg;
@@ -558,10 +559,14 @@ extern const struct iwl_cfg iwl9460_2ac_cfg_soc;
558extern const struct iwl_cfg iwl9461_2ac_cfg_soc; 559extern const struct iwl_cfg iwl9461_2ac_cfg_soc;
559extern const struct iwl_cfg iwl9462_2ac_cfg_soc; 560extern const struct iwl_cfg iwl9462_2ac_cfg_soc;
560extern const struct iwl_cfg iwl9560_2ac_cfg_soc; 561extern const struct iwl_cfg iwl9560_2ac_cfg_soc;
562extern const struct iwl_cfg iwl9560_killer_2ac_cfg_soc;
563extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc;
561extern const struct iwl_cfg iwl9460_2ac_cfg_shared_clk; 564extern const struct iwl_cfg iwl9460_2ac_cfg_shared_clk;
562extern const struct iwl_cfg iwl9461_2ac_cfg_shared_clk; 565extern const struct iwl_cfg iwl9461_2ac_cfg_shared_clk;
563extern const struct iwl_cfg iwl9462_2ac_cfg_shared_clk; 566extern const struct iwl_cfg iwl9462_2ac_cfg_shared_clk;
564extern const struct iwl_cfg iwl9560_2ac_cfg_shared_clk; 567extern const struct iwl_cfg iwl9560_2ac_cfg_shared_clk;
568extern const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk;
569extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk;
565extern const struct iwl_cfg iwl22000_2ac_cfg_hr; 570extern const struct iwl_cfg iwl22000_2ac_cfg_hr;
566extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; 571extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb;
567extern const struct iwl_cfg iwl22000_2ac_cfg_jf; 572extern const struct iwl_cfg iwl22000_2ac_cfg_jf;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 38234bda9017..8520523b91b4 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -545,6 +545,9 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
545 {IWL_PCI_DEVICE(0x2526, 0x1210, iwl9260_2ac_cfg)}, 545 {IWL_PCI_DEVICE(0x2526, 0x1210, iwl9260_2ac_cfg)},
546 {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, 546 {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)},
547 {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg_soc)}, 547 {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg_soc)},
548 {IWL_PCI_DEVICE(0x2526, 0x1550, iwl9260_killer_2ac_cfg)},
549 {IWL_PCI_DEVICE(0x2526, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
550 {IWL_PCI_DEVICE(0x2526, 0x1552, iwl9560_killer_2ac_cfg_soc)},
548 {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)}, 551 {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)},
549 {IWL_PCI_DEVICE(0x2526, 0x2030, iwl9560_2ac_cfg_soc)}, 552 {IWL_PCI_DEVICE(0x2526, 0x2030, iwl9560_2ac_cfg_soc)},
550 {IWL_PCI_DEVICE(0x2526, 0x2034, iwl9560_2ac_cfg_soc)}, 553 {IWL_PCI_DEVICE(0x2526, 0x2034, iwl9560_2ac_cfg_soc)},
@@ -554,6 +557,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
554 {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, 557 {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)},
555 {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)}, 558 {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)},
556 {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)}, 559 {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)},
560 {IWL_PCI_DEVICE(0x2526, 0x8014, iwl9260_2ac_cfg)},
557 {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, 561 {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)},
558 {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, 562 {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)},
559 {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, 563 {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)},
@@ -578,6 +582,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
578 {IWL_PCI_DEVICE(0x2720, 0x1010, iwl9260_2ac_cfg)}, 582 {IWL_PCI_DEVICE(0x2720, 0x1010, iwl9260_2ac_cfg)},
579 {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_soc)}, 583 {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_soc)},
580 {IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)}, 584 {IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)},
585 {IWL_PCI_DEVICE(0x2720, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
586 {IWL_PCI_DEVICE(0x2720, 0x1552, iwl9560_killer_2ac_cfg_soc)},
581 {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_cfg_soc)}, 587 {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_cfg_soc)},
582 {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_cfg_soc)}, 588 {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_cfg_soc)},
583 {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_cfg)}, 589 {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_cfg)},
@@ -604,6 +610,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
604 {IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)}, 610 {IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)},
605 {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)}, 611 {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)},
606 {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)}, 612 {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)},
613 {IWL_PCI_DEVICE(0x30DC, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
614 {IWL_PCI_DEVICE(0x30DC, 0x1552, iwl9560_killer_2ac_cfg_soc)},
607 {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_cfg_soc)}, 615 {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_cfg_soc)},
608 {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_cfg_soc)}, 616 {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_cfg_soc)},
609 {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_cfg_soc)}, 617 {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_cfg_soc)},
@@ -630,6 +638,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
630 {IWL_PCI_DEVICE(0x31DC, 0x1010, iwl9260_2ac_cfg)}, 638 {IWL_PCI_DEVICE(0x31DC, 0x1010, iwl9260_2ac_cfg)},
631 {IWL_PCI_DEVICE(0x31DC, 0x1030, iwl9560_2ac_cfg_shared_clk)}, 639 {IWL_PCI_DEVICE(0x31DC, 0x1030, iwl9560_2ac_cfg_shared_clk)},
632 {IWL_PCI_DEVICE(0x31DC, 0x1210, iwl9260_2ac_cfg)}, 640 {IWL_PCI_DEVICE(0x31DC, 0x1210, iwl9260_2ac_cfg)},
641 {IWL_PCI_DEVICE(0x31DC, 0x1551, iwl9560_killer_s_2ac_cfg_shared_clk)},
642 {IWL_PCI_DEVICE(0x31DC, 0x1552, iwl9560_killer_2ac_cfg_shared_clk)},
633 {IWL_PCI_DEVICE(0x31DC, 0x2030, iwl9560_2ac_cfg_shared_clk)}, 643 {IWL_PCI_DEVICE(0x31DC, 0x2030, iwl9560_2ac_cfg_shared_clk)},
634 {IWL_PCI_DEVICE(0x31DC, 0x2034, iwl9560_2ac_cfg_shared_clk)}, 644 {IWL_PCI_DEVICE(0x31DC, 0x2034, iwl9560_2ac_cfg_shared_clk)},
635 {IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_cfg_shared_clk)}, 645 {IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_cfg_shared_clk)},
@@ -656,6 +666,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
656 {IWL_PCI_DEVICE(0x34F0, 0x1010, iwl9260_2ac_cfg)}, 666 {IWL_PCI_DEVICE(0x34F0, 0x1010, iwl9260_2ac_cfg)},
657 {IWL_PCI_DEVICE(0x34F0, 0x1030, iwl9560_2ac_cfg_soc)}, 667 {IWL_PCI_DEVICE(0x34F0, 0x1030, iwl9560_2ac_cfg_soc)},
658 {IWL_PCI_DEVICE(0x34F0, 0x1210, iwl9260_2ac_cfg)}, 668 {IWL_PCI_DEVICE(0x34F0, 0x1210, iwl9260_2ac_cfg)},
669 {IWL_PCI_DEVICE(0x34F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
670 {IWL_PCI_DEVICE(0x34F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
659 {IWL_PCI_DEVICE(0x34F0, 0x2030, iwl9560_2ac_cfg_soc)}, 671 {IWL_PCI_DEVICE(0x34F0, 0x2030, iwl9560_2ac_cfg_soc)},
660 {IWL_PCI_DEVICE(0x34F0, 0x2034, iwl9560_2ac_cfg_soc)}, 672 {IWL_PCI_DEVICE(0x34F0, 0x2034, iwl9560_2ac_cfg_soc)},
661 {IWL_PCI_DEVICE(0x34F0, 0x4030, iwl9560_2ac_cfg_soc)}, 673 {IWL_PCI_DEVICE(0x34F0, 0x4030, iwl9560_2ac_cfg_soc)},
@@ -682,6 +694,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
682 {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)}, 694 {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)},
683 {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)}, 695 {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)},
684 {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)}, 696 {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)},
697 {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
698 {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
685 {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_cfg_soc)}, 699 {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_cfg_soc)},
686 {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_cfg_soc)}, 700 {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_cfg_soc)},
687 {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_cfg_soc)}, 701 {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_cfg_soc)},
@@ -708,6 +722,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
708 {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)}, 722 {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)},
709 {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)}, 723 {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)},
710 {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)}, 724 {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)},
725 {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
726 {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
711 {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_cfg_soc)}, 727 {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_cfg_soc)},
712 {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_cfg_soc)}, 728 {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_cfg_soc)},
713 {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_cfg_soc)}, 729 {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_cfg_soc)},
@@ -743,6 +759,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
743 {IWL_PCI_DEVICE(0x9DF0, 0x1010, iwl9260_2ac_cfg)}, 759 {IWL_PCI_DEVICE(0x9DF0, 0x1010, iwl9260_2ac_cfg)},
744 {IWL_PCI_DEVICE(0x9DF0, 0x1030, iwl9560_2ac_cfg_soc)}, 760 {IWL_PCI_DEVICE(0x9DF0, 0x1030, iwl9560_2ac_cfg_soc)},
745 {IWL_PCI_DEVICE(0x9DF0, 0x1210, iwl9260_2ac_cfg)}, 761 {IWL_PCI_DEVICE(0x9DF0, 0x1210, iwl9260_2ac_cfg)},
762 {IWL_PCI_DEVICE(0x9DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
763 {IWL_PCI_DEVICE(0x9DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
746 {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg_soc)}, 764 {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg_soc)},
747 {IWL_PCI_DEVICE(0x9DF0, 0x2030, iwl9560_2ac_cfg_soc)}, 765 {IWL_PCI_DEVICE(0x9DF0, 0x2030, iwl9560_2ac_cfg_soc)},
748 {IWL_PCI_DEVICE(0x9DF0, 0x2034, iwl9560_2ac_cfg_soc)}, 766 {IWL_PCI_DEVICE(0x9DF0, 0x2034, iwl9560_2ac_cfg_soc)},
@@ -771,6 +789,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
771 {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)}, 789 {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)},
772 {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)}, 790 {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)},
773 {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)}, 791 {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)},
792 {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
793 {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
774 {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_cfg_soc)}, 794 {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_cfg_soc)},
775 {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_cfg_soc)}, 795 {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_cfg_soc)},
776 {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_cfg_soc)}, 796 {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_cfg_soc)},
@@ -797,6 +817,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
797 {IWL_PCI_DEVICE(0xA370, 0x1010, iwl9260_2ac_cfg)}, 817 {IWL_PCI_DEVICE(0xA370, 0x1010, iwl9260_2ac_cfg)},
798 {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg_soc)}, 818 {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg_soc)},
799 {IWL_PCI_DEVICE(0xA370, 0x1210, iwl9260_2ac_cfg)}, 819 {IWL_PCI_DEVICE(0xA370, 0x1210, iwl9260_2ac_cfg)},
820 {IWL_PCI_DEVICE(0xA370, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
821 {IWL_PCI_DEVICE(0xA370, 0x1552, iwl9560_killer_2ac_cfg_soc)},
800 {IWL_PCI_DEVICE(0xA370, 0x2030, iwl9560_2ac_cfg_soc)}, 822 {IWL_PCI_DEVICE(0xA370, 0x2030, iwl9560_2ac_cfg_soc)},
801 {IWL_PCI_DEVICE(0xA370, 0x2034, iwl9560_2ac_cfg_soc)}, 823 {IWL_PCI_DEVICE(0xA370, 0x2034, iwl9560_2ac_cfg_soc)},
802 {IWL_PCI_DEVICE(0xA370, 0x4030, iwl9560_2ac_cfg_soc)}, 824 {IWL_PCI_DEVICE(0xA370, 0x4030, iwl9560_2ac_cfg_soc)},
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index a57daecf1d57..9dd2ca62d84a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -87,6 +87,7 @@ struct netfront_cb {
87/* IRQ name is queue name with "-tx" or "-rx" appended */ 87/* IRQ name is queue name with "-tx" or "-rx" appended */
88#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) 88#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
89 89
90static DECLARE_WAIT_QUEUE_HEAD(module_load_q);
90static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); 91static DECLARE_WAIT_QUEUE_HEAD(module_unload_q);
91 92
92struct netfront_stats { 93struct netfront_stats {
@@ -893,7 +894,6 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
893 struct sk_buff *skb, 894 struct sk_buff *skb,
894 struct sk_buff_head *list) 895 struct sk_buff_head *list)
895{ 896{
896 struct skb_shared_info *shinfo = skb_shinfo(skb);
897 RING_IDX cons = queue->rx.rsp_cons; 897 RING_IDX cons = queue->rx.rsp_cons;
898 struct sk_buff *nskb; 898 struct sk_buff *nskb;
899 899
@@ -902,15 +902,16 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
902 RING_GET_RESPONSE(&queue->rx, ++cons); 902 RING_GET_RESPONSE(&queue->rx, ++cons);
903 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; 903 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
904 904
905 if (shinfo->nr_frags == MAX_SKB_FRAGS) { 905 if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
906 unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; 906 unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
907 907
908 BUG_ON(pull_to <= skb_headlen(skb)); 908 BUG_ON(pull_to <= skb_headlen(skb));
909 __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); 909 __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
910 } 910 }
911 BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS); 911 BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
912 912
913 skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag), 913 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
914 skb_frag_page(nfrag),
914 rx->offset, rx->status, PAGE_SIZE); 915 rx->offset, rx->status, PAGE_SIZE);
915 916
916 skb_shinfo(nskb)->nr_frags = 0; 917 skb_shinfo(nskb)->nr_frags = 0;
@@ -1330,6 +1331,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1330 netif_carrier_off(netdev); 1331 netif_carrier_off(netdev);
1331 1332
1332 xenbus_switch_state(dev, XenbusStateInitialising); 1333 xenbus_switch_state(dev, XenbusStateInitialising);
1334 wait_event(module_load_q,
1335 xenbus_read_driver_state(dev->otherend) !=
1336 XenbusStateClosed &&
1337 xenbus_read_driver_state(dev->otherend) !=
1338 XenbusStateUnknown);
1333 return netdev; 1339 return netdev;
1334 1340
1335 exit: 1341 exit:
diff --git a/drivers/nubus/bus.c b/drivers/nubus/bus.c
index a59b6c4bb5b8..ad3d17c42e23 100644
--- a/drivers/nubus/bus.c
+++ b/drivers/nubus/bus.c
@@ -5,6 +5,7 @@
5// Copyright (C) 2017 Finn Thain 5// Copyright (C) 2017 Finn Thain
6 6
7#include <linux/device.h> 7#include <linux/device.h>
8#include <linux/dma-mapping.h>
8#include <linux/list.h> 9#include <linux/list.h>
9#include <linux/nubus.h> 10#include <linux/nubus.h>
10#include <linux/seq_file.h> 11#include <linux/seq_file.h>
@@ -93,6 +94,8 @@ int nubus_device_register(struct nubus_board *board)
93 board->dev.release = nubus_device_release; 94 board->dev.release = nubus_device_release;
94 board->dev.bus = &nubus_bus_type; 95 board->dev.bus = &nubus_bus_type;
95 dev_set_name(&board->dev, "slot.%X", board->slot); 96 dev_set_name(&board->dev, "slot.%X", board->slot);
97 board->dev.dma_mask = &board->dev.coherent_dma_mask;
98 dma_set_mask(&board->dev, DMA_BIT_MASK(32));
96 return device_register(&board->dev); 99 return device_register(&board->dev);
97} 100}
98 101
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 35b7fc87eac5..5cb40b2518f9 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -330,7 +330,7 @@ void pci_bus_add_device(struct pci_dev *dev)
330 return; 330 return;
331 } 331 }
332 332
333 dev->is_added = 1; 333 pci_dev_assign_added(dev, true);
334} 334}
335EXPORT_SYMBOL_GPL(pci_bus_add_device); 335EXPORT_SYMBOL_GPL(pci_bus_add_device);
336 336
@@ -347,14 +347,14 @@ void pci_bus_add_devices(const struct pci_bus *bus)
347 347
348 list_for_each_entry(dev, &bus->devices, bus_list) { 348 list_for_each_entry(dev, &bus->devices, bus_list) {
349 /* Skip already-added devices */ 349 /* Skip already-added devices */
350 if (dev->is_added) 350 if (pci_dev_is_added(dev))
351 continue; 351 continue;
352 pci_bus_add_device(dev); 352 pci_bus_add_device(dev);
353 } 353 }
354 354
355 list_for_each_entry(dev, &bus->devices, bus_list) { 355 list_for_each_entry(dev, &bus->devices, bus_list) {
356 /* Skip if device attach failed */ 356 /* Skip if device attach failed */
357 if (!dev->is_added) 357 if (!pci_dev_is_added(dev))
358 continue; 358 continue;
359 child = dev->subordinate; 359 child = dev->subordinate;
360 if (child) 360 if (child)
diff --git a/drivers/pci/controller/pcie-mobiveil.c b/drivers/pci/controller/pcie-mobiveil.c
index 4d6c20e47bed..cf0aa7cee5b0 100644
--- a/drivers/pci/controller/pcie-mobiveil.c
+++ b/drivers/pci/controller/pcie-mobiveil.c
@@ -107,7 +107,7 @@
107#define CFG_WINDOW_TYPE 0 107#define CFG_WINDOW_TYPE 0
108#define IO_WINDOW_TYPE 1 108#define IO_WINDOW_TYPE 1
109#define MEM_WINDOW_TYPE 2 109#define MEM_WINDOW_TYPE 2
110#define IB_WIN_SIZE (256 * 1024 * 1024 * 1024) 110#define IB_WIN_SIZE ((u64)256 * 1024 * 1024 * 1024)
111#define MAX_PIO_WINDOWS 8 111#define MAX_PIO_WINDOWS 8
112 112
113/* Parameters for the waiting for link up routine */ 113/* Parameters for the waiting for link up routine */
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 3a17b290df5d..ef0b1b6ba86f 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -509,7 +509,7 @@ static void enable_slot(struct acpiphp_slot *slot)
509 509
510 list_for_each_entry(dev, &bus->devices, bus_list) { 510 list_for_each_entry(dev, &bus->devices, bus_list) {
511 /* Assume that newly added devices are powered on already. */ 511 /* Assume that newly added devices are powered on already. */
512 if (!dev->is_added) 512 if (!pci_dev_is_added(dev))
513 dev->current_state = PCI_D0; 513 dev->current_state = PCI_D0;
514 } 514 }
515 515
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 882f1f9596df..08817253c8a2 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -288,6 +288,7 @@ struct pci_sriov {
288 288
289/* pci_dev priv_flags */ 289/* pci_dev priv_flags */
290#define PCI_DEV_DISCONNECTED 0 290#define PCI_DEV_DISCONNECTED 0
291#define PCI_DEV_ADDED 1
291 292
292static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) 293static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
293{ 294{
@@ -300,6 +301,16 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev)
300 return test_bit(PCI_DEV_DISCONNECTED, &dev->priv_flags); 301 return test_bit(PCI_DEV_DISCONNECTED, &dev->priv_flags);
301} 302}
302 303
304static inline void pci_dev_assign_added(struct pci_dev *dev, bool added)
305{
306 assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added);
307}
308
309static inline bool pci_dev_is_added(const struct pci_dev *dev)
310{
311 return test_bit(PCI_DEV_ADDED, &dev->priv_flags);
312}
313
303#ifdef CONFIG_PCI_ATS 314#ifdef CONFIG_PCI_ATS
304void pci_restore_ats_state(struct pci_dev *dev); 315void pci_restore_ats_state(struct pci_dev *dev);
305#else 316#else
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e32de4b..611adcd9c169 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2433,13 +2433,13 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
2433 dev = pci_scan_single_device(bus, devfn); 2433 dev = pci_scan_single_device(bus, devfn);
2434 if (!dev) 2434 if (!dev)
2435 return 0; 2435 return 0;
2436 if (!dev->is_added) 2436 if (!pci_dev_is_added(dev))
2437 nr++; 2437 nr++;
2438 2438
2439 for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) { 2439 for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) {
2440 dev = pci_scan_single_device(bus, devfn + fn); 2440 dev = pci_scan_single_device(bus, devfn + fn);
2441 if (dev) { 2441 if (dev) {
2442 if (!dev->is_added) 2442 if (!pci_dev_is_added(dev))
2443 nr++; 2443 nr++;
2444 dev->multifunction = 1; 2444 dev->multifunction = 1;
2445 } 2445 }
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 6f072eae4f7a..5e3d0dced2b8 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -19,11 +19,12 @@ static void pci_stop_dev(struct pci_dev *dev)
19{ 19{
20 pci_pme_active(dev, false); 20 pci_pme_active(dev, false);
21 21
22 if (dev->is_added) { 22 if (pci_dev_is_added(dev)) {
23 device_release_driver(&dev->dev); 23 device_release_driver(&dev->dev);
24 pci_proc_detach_device(dev); 24 pci_proc_detach_device(dev);
25 pci_remove_sysfs_dev_files(dev); 25 pci_remove_sysfs_dev_files(dev);
26 dev->is_added = 0; 26
27 pci_dev_assign_added(dev, false);
27 } 28 }
28 29
29 if (dev->bus->self) 30 if (dev->bus->self)
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index ea23c8dffc25..ffec695e0bfb 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -754,9 +754,9 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
754 case ELS_LOGO: 754 case ELS_LOGO:
755 if (fip->mode == FIP_MODE_VN2VN) { 755 if (fip->mode == FIP_MODE_VN2VN) {
756 if (fip->state != FIP_ST_VNMP_UP) 756 if (fip->state != FIP_ST_VNMP_UP)
757 return -EINVAL; 757 goto drop;
758 if (ntoh24(fh->fh_d_id) == FC_FID_FLOGI) 758 if (ntoh24(fh->fh_d_id) == FC_FID_FLOGI)
759 return -EINVAL; 759 goto drop;
760 } else { 760 } else {
761 if (fip->state != FIP_ST_ENABLED) 761 if (fip->state != FIP_ST_ENABLED)
762 return 0; 762 return 0;
@@ -799,9 +799,9 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
799 fip->send(fip, skb); 799 fip->send(fip, skb);
800 return -EINPROGRESS; 800 return -EINPROGRESS;
801drop: 801drop:
802 kfree_skb(skb);
803 LIBFCOE_FIP_DBG(fip, "drop els_send op %u d_id %x\n", 802 LIBFCOE_FIP_DBG(fip, "drop els_send op %u d_id %x\n",
804 op, ntoh24(fh->fh_d_id)); 803 op, ntoh24(fh->fh_d_id));
804 kfree_skb(skb);
805 return -EINVAL; 805 return -EINVAL;
806} 806}
807EXPORT_SYMBOL(fcoe_ctlr_els_send); 807EXPORT_SYMBOL(fcoe_ctlr_els_send);
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 31d31aad3de1..89b1f1af2fd4 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -2164,6 +2164,7 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp)
2164 FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n", 2164 FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
2165 fc_rport_state(rdata)); 2165 fc_rport_state(rdata));
2166 2166
2167 rdata->flags &= ~FC_RP_STARTED;
2167 fc_rport_enter_delete(rdata, RPORT_EV_STOP); 2168 fc_rport_enter_delete(rdata, RPORT_EV_STOP);
2168 mutex_unlock(&rdata->rp_mutex); 2169 mutex_unlock(&rdata->rp_mutex);
2169 kref_put(&rdata->kref, fc_rport_destroy); 2170 kref_put(&rdata->kref, fc_rport_destroy);
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index d6093838f5f2..c972cc2b3d5b 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -284,11 +284,11 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode)
284 */ 284 */
285 if (opcode != ISCSI_OP_SCSI_DATA_OUT) { 285 if (opcode != ISCSI_OP_SCSI_DATA_OUT) {
286 iscsi_conn_printk(KERN_INFO, conn, 286 iscsi_conn_printk(KERN_INFO, conn,
287 "task [op %x/%x itt " 287 "task [op %x itt "
288 "0x%x/0x%x] " 288 "0x%x/0x%x] "
289 "rejected.\n", 289 "rejected.\n",
290 task->hdr->opcode, opcode, 290 opcode, task->itt,
291 task->itt, task->hdr_itt); 291 task->hdr_itt);
292 return -EACCES; 292 return -EACCES;
293 } 293 }
294 /* 294 /*
@@ -297,10 +297,10 @@ static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode)
297 */ 297 */
298 if (conn->session->fast_abort) { 298 if (conn->session->fast_abort) {
299 iscsi_conn_printk(KERN_INFO, conn, 299 iscsi_conn_printk(KERN_INFO, conn,
300 "task [op %x/%x itt " 300 "task [op %x itt "
301 "0x%x/0x%x] fast abort.\n", 301 "0x%x/0x%x] fast abort.\n",
302 task->hdr->opcode, opcode, 302 opcode, task->itt,
303 task->itt, task->hdr_itt); 303 task->hdr_itt);
304 return -EACCES; 304 return -EACCES;
305 } 305 }
306 break; 306 break;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 569392d0d4c9..e44c91edf92d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -3343,11 +3343,10 @@ _base_mpi_ep_writeq(__u64 b, volatile void __iomem *addr,
3343 spinlock_t *writeq_lock) 3343 spinlock_t *writeq_lock)
3344{ 3344{
3345 unsigned long flags; 3345 unsigned long flags;
3346 __u64 data_out = b;
3347 3346
3348 spin_lock_irqsave(writeq_lock, flags); 3347 spin_lock_irqsave(writeq_lock, flags);
3349 writel((u32)(data_out), addr); 3348 __raw_writel((u32)(b), addr);
3350 writel((u32)(data_out >> 32), (addr + 4)); 3349 __raw_writel((u32)(b >> 32), (addr + 4));
3351 mmiowb(); 3350 mmiowb();
3352 spin_unlock_irqrestore(writeq_lock, flags); 3351 spin_unlock_irqrestore(writeq_lock, flags);
3353} 3352}
@@ -3367,7 +3366,8 @@ _base_mpi_ep_writeq(__u64 b, volatile void __iomem *addr,
3367static inline void 3366static inline void
3368_base_writeq(__u64 b, volatile void __iomem *addr, spinlock_t *writeq_lock) 3367_base_writeq(__u64 b, volatile void __iomem *addr, spinlock_t *writeq_lock)
3369{ 3368{
3370 writeq(b, addr); 3369 __raw_writeq(b, addr);
3370 mmiowb();
3371} 3371}
3372#else 3372#else
3373static inline void 3373static inline void
@@ -5268,7 +5268,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
5268 5268
5269 /* send message 32-bits at a time */ 5269 /* send message 32-bits at a time */
5270 for (i = 0, failed = 0; i < request_bytes/4 && !failed; i++) { 5270 for (i = 0, failed = 0; i < request_bytes/4 && !failed; i++) {
5271 writel((u32)(request[i]), &ioc->chip->Doorbell); 5271 writel(cpu_to_le32(request[i]), &ioc->chip->Doorbell);
5272 if ((_base_wait_for_doorbell_ack(ioc, 5))) 5272 if ((_base_wait_for_doorbell_ack(ioc, 5)))
5273 failed = 1; 5273 failed = 1;
5274 } 5274 }
@@ -5289,7 +5289,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
5289 } 5289 }
5290 5290
5291 /* read the first two 16-bits, it gives the total length of the reply */ 5291 /* read the first two 16-bits, it gives the total length of the reply */
5292 reply[0] = (u16)(readl(&ioc->chip->Doorbell) 5292 reply[0] = le16_to_cpu(readl(&ioc->chip->Doorbell)
5293 & MPI2_DOORBELL_DATA_MASK); 5293 & MPI2_DOORBELL_DATA_MASK);
5294 writel(0, &ioc->chip->HostInterruptStatus); 5294 writel(0, &ioc->chip->HostInterruptStatus);
5295 if ((_base_wait_for_doorbell_int(ioc, 5))) { 5295 if ((_base_wait_for_doorbell_int(ioc, 5))) {
@@ -5298,7 +5298,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
5298 ioc->name, __LINE__); 5298 ioc->name, __LINE__);
5299 return -EFAULT; 5299 return -EFAULT;
5300 } 5300 }
5301 reply[1] = (u16)(readl(&ioc->chip->Doorbell) 5301 reply[1] = le16_to_cpu(readl(&ioc->chip->Doorbell)
5302 & MPI2_DOORBELL_DATA_MASK); 5302 & MPI2_DOORBELL_DATA_MASK);
5303 writel(0, &ioc->chip->HostInterruptStatus); 5303 writel(0, &ioc->chip->HostInterruptStatus);
5304 5304
@@ -5312,7 +5312,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
5312 if (i >= reply_bytes/2) /* overflow case */ 5312 if (i >= reply_bytes/2) /* overflow case */
5313 readl(&ioc->chip->Doorbell); 5313 readl(&ioc->chip->Doorbell);
5314 else 5314 else
5315 reply[i] = (u16)(readl(&ioc->chip->Doorbell) 5315 reply[i] = le16_to_cpu(readl(&ioc->chip->Doorbell)
5316 & MPI2_DOORBELL_DATA_MASK); 5316 & MPI2_DOORBELL_DATA_MASK);
5317 writel(0, &ioc->chip->HostInterruptStatus); 5317 writel(0, &ioc->chip->HostInterruptStatus);
5318 } 5318 }
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 091ec1207bea..cff83b9457f7 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -888,7 +888,7 @@ static void qedi_get_boot_tgt_info(struct nvm_iscsi_block *block,
888 ipv6_en = !!(block->generic.ctrl_flags & 888 ipv6_en = !!(block->generic.ctrl_flags &
889 NVM_ISCSI_CFG_GEN_IPV6_ENABLED); 889 NVM_ISCSI_CFG_GEN_IPV6_ENABLED);
890 890
891 snprintf(tgt->iscsi_name, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n", 891 snprintf(tgt->iscsi_name, sizeof(tgt->iscsi_name), "%s\n",
892 block->target[index].target_name.byte); 892 block->target[index].target_name.byte);
893 893
894 tgt->ipv6_en = ipv6_en; 894 tgt->ipv6_en = ipv6_en;
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 89a4999fa631..c8731568f9c4 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2141,6 +2141,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
2141 msleep(1000); 2141 msleep(1000);
2142 2142
2143 qla24xx_disable_vp(vha); 2143 qla24xx_disable_vp(vha);
2144 qla2x00_wait_for_sess_deletion(vha);
2144 2145
2145 vha->flags.delete_progress = 1; 2146 vha->flags.delete_progress = 1;
2146 2147
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index f68eb6096559..2660a48d918a 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -214,6 +214,7 @@ void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
214int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *); 214int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
215int qla24xx_async_abort_cmd(srb_t *); 215int qla24xx_async_abort_cmd(srb_t *);
216int qla24xx_post_relogin_work(struct scsi_qla_host *vha); 216int qla24xx_post_relogin_work(struct scsi_qla_host *vha);
217void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *);
217 218
218/* 219/*
219 * Global Functions in qla_mid.c source file. 220 * Global Functions in qla_mid.c source file.
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 2c35b0b2baa0..7a3744006419 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -3708,6 +3708,10 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
3708 return rval; 3708 return rval;
3709 3709
3710done_free_sp: 3710done_free_sp:
3711 spin_lock_irqsave(&vha->hw->vport_slock, flags);
3712 list_del(&sp->elem);
3713 spin_unlock_irqrestore(&vha->hw->vport_slock, flags);
3714
3711 if (sp->u.iocb_cmd.u.ctarg.req) { 3715 if (sp->u.iocb_cmd.u.ctarg.req) {
3712 dma_free_coherent(&vha->hw->pdev->dev, 3716 dma_free_coherent(&vha->hw->pdev->dev,
3713 sizeof(struct ct_sns_pkt), 3717 sizeof(struct ct_sns_pkt),
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index db0e3279e07a..1b19b954bbae 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1489,11 +1489,10 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
1489 1489
1490 wait_for_completion(&tm_iocb->u.tmf.comp); 1490 wait_for_completion(&tm_iocb->u.tmf.comp);
1491 1491
1492 rval = tm_iocb->u.tmf.comp_status == CS_COMPLETE ? 1492 rval = tm_iocb->u.tmf.data;
1493 QLA_SUCCESS : QLA_FUNCTION_FAILED;
1494 1493
1495 if ((rval != QLA_SUCCESS) || tm_iocb->u.tmf.data) { 1494 if (rval != QLA_SUCCESS) {
1496 ql_dbg(ql_dbg_taskm, vha, 0x8030, 1495 ql_log(ql_log_warn, vha, 0x8030,
1497 "TM IOCB failed (%x).\n", rval); 1496 "TM IOCB failed (%x).\n", rval);
1498 } 1497 }
1499 1498
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 37ae0f6d8ae5..59fd5a9dfeb8 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -222,6 +222,8 @@ qla2xxx_get_qpair_sp(struct qla_qpair *qpair, fc_port_t *fcport, gfp_t flag)
222 sp->fcport = fcport; 222 sp->fcport = fcport;
223 sp->iocbs = 1; 223 sp->iocbs = 1;
224 sp->vha = qpair->vha; 224 sp->vha = qpair->vha;
225 INIT_LIST_HEAD(&sp->elem);
226
225done: 227done:
226 if (!sp) 228 if (!sp)
227 QLA_QPAIR_MARK_NOT_BUSY(qpair); 229 QLA_QPAIR_MARK_NOT_BUSY(qpair);
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index a91cca52b5d5..dd93a22fe843 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2130,34 +2130,11 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp)
2130 req_cnt = 1; 2130 req_cnt = 1;
2131 handle = 0; 2131 handle = 0;
2132 2132
2133 if (!sp) 2133 if (sp && (sp->type != SRB_SCSI_CMD)) {
2134 goto skip_cmd_array; 2134 /* Adjust entry-counts as needed. */
2135
2136 /* Check for room in outstanding command list. */
2137 handle = req->current_outstanding_cmd;
2138 for (index = 1; index < req->num_outstanding_cmds; index++) {
2139 handle++;
2140 if (handle == req->num_outstanding_cmds)
2141 handle = 1;
2142 if (!req->outstanding_cmds[handle])
2143 break;
2144 }
2145 if (index == req->num_outstanding_cmds) {
2146 ql_log(ql_log_warn, vha, 0x700b,
2147 "No room on outstanding cmd array.\n");
2148 goto queuing_error;
2149 }
2150
2151 /* Prep command array. */
2152 req->current_outstanding_cmd = handle;
2153 req->outstanding_cmds[handle] = sp;
2154 sp->handle = handle;
2155
2156 /* Adjust entry-counts as needed. */
2157 if (sp->type != SRB_SCSI_CMD)
2158 req_cnt = sp->iocbs; 2135 req_cnt = sp->iocbs;
2136 }
2159 2137
2160skip_cmd_array:
2161 /* Check for room on request queue. */ 2138 /* Check for room on request queue. */
2162 if (req->cnt < req_cnt + 2) { 2139 if (req->cnt < req_cnt + 2) {
2163 if (qpair->use_shadow_reg) 2140 if (qpair->use_shadow_reg)
@@ -2183,6 +2160,28 @@ skip_cmd_array:
2183 if (req->cnt < req_cnt + 2) 2160 if (req->cnt < req_cnt + 2)
2184 goto queuing_error; 2161 goto queuing_error;
2185 2162
2163 if (sp) {
2164 /* Check for room in outstanding command list. */
2165 handle = req->current_outstanding_cmd;
2166 for (index = 1; index < req->num_outstanding_cmds; index++) {
2167 handle++;
2168 if (handle == req->num_outstanding_cmds)
2169 handle = 1;
2170 if (!req->outstanding_cmds[handle])
2171 break;
2172 }
2173 if (index == req->num_outstanding_cmds) {
2174 ql_log(ql_log_warn, vha, 0x700b,
2175 "No room on outstanding cmd array.\n");
2176 goto queuing_error;
2177 }
2178
2179 /* Prep command array. */
2180 req->current_outstanding_cmd = handle;
2181 req->outstanding_cmds[handle] = sp;
2182 sp->handle = handle;
2183 }
2184
2186 /* Prep packet */ 2185 /* Prep packet */
2187 req->cnt -= req_cnt; 2186 req->cnt -= req_cnt;
2188 pkt = req->ring_ptr; 2187 pkt = req->ring_ptr;
@@ -2195,6 +2194,8 @@ skip_cmd_array:
2195 pkt->handle = handle; 2194 pkt->handle = handle;
2196 } 2195 }
2197 2196
2197 return pkt;
2198
2198queuing_error: 2199queuing_error:
2199 qpair->tgt_counters.num_alloc_iocb_failed++; 2200 qpair->tgt_counters.num_alloc_iocb_failed++;
2200 return pkt; 2201 return pkt;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 9fa5a2557f2c..7756106d4555 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -631,6 +631,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
631 unsigned long flags; 631 unsigned long flags;
632 fc_port_t *fcport = NULL; 632 fc_port_t *fcport = NULL;
633 633
634 if (!vha->hw->flags.fw_started)
635 return;
636
634 /* Setup to process RIO completion. */ 637 /* Setup to process RIO completion. */
635 handle_cnt = 0; 638 handle_cnt = 0;
636 if (IS_CNA_CAPABLE(ha)) 639 if (IS_CNA_CAPABLE(ha))
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 7e875f575229..f0ec13d48bf3 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -4220,6 +4220,9 @@ qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req)
4220 mbx_cmd_t *mcp = &mc; 4220 mbx_cmd_t *mcp = &mc;
4221 struct qla_hw_data *ha = vha->hw; 4221 struct qla_hw_data *ha = vha->hw;
4222 4222
4223 if (!ha->flags.fw_started)
4224 return QLA_SUCCESS;
4225
4223 ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10d3, 4226 ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10d3,
4224 "Entered %s.\n", __func__); 4227 "Entered %s.\n", __func__);
4225 4228
@@ -4289,6 +4292,9 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp)
4289 mbx_cmd_t *mcp = &mc; 4292 mbx_cmd_t *mcp = &mc;
4290 struct qla_hw_data *ha = vha->hw; 4293 struct qla_hw_data *ha = vha->hw;
4291 4294
4295 if (!ha->flags.fw_started)
4296 return QLA_SUCCESS;
4297
4292 ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10d6, 4298 ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10d6,
4293 "Entered %s.\n", __func__); 4299 "Entered %s.\n", __func__);
4294 4300
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index f6f0a759a7c2..aa727d07b702 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -152,11 +152,18 @@ int
152qla24xx_disable_vp(scsi_qla_host_t *vha) 152qla24xx_disable_vp(scsi_qla_host_t *vha)
153{ 153{
154 unsigned long flags; 154 unsigned long flags;
155 int ret; 155 int ret = QLA_SUCCESS;
156 fc_port_t *fcport;
157
158 if (vha->hw->flags.fw_started)
159 ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL);
156 160
157 ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL);
158 atomic_set(&vha->loop_state, LOOP_DOWN); 161 atomic_set(&vha->loop_state, LOOP_DOWN);
159 atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); 162 atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
163 list_for_each_entry(fcport, &vha->vp_fcports, list)
164 fcport->logout_on_delete = 0;
165
166 qla2x00_mark_all_devices_lost(vha, 0);
160 167
161 /* Remove port id from vp target map */ 168 /* Remove port id from vp target map */
162 spin_lock_irqsave(&vha->hw->hardware_lock, flags); 169 spin_lock_irqsave(&vha->hw->hardware_lock, flags);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 9f309e572be4..1fbd16c8c9a7 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -303,6 +303,7 @@ static void qla2x00_free_device(scsi_qla_host_t *);
303static int qla2xxx_map_queues(struct Scsi_Host *shost); 303static int qla2xxx_map_queues(struct Scsi_Host *shost);
304static void qla2x00_destroy_deferred_work(struct qla_hw_data *); 304static void qla2x00_destroy_deferred_work(struct qla_hw_data *);
305 305
306
306struct scsi_host_template qla2xxx_driver_template = { 307struct scsi_host_template qla2xxx_driver_template = {
307 .module = THIS_MODULE, 308 .module = THIS_MODULE,
308 .name = QLA2XXX_DRIVER_NAME, 309 .name = QLA2XXX_DRIVER_NAME,
@@ -1147,7 +1148,7 @@ static inline int test_fcport_count(scsi_qla_host_t *vha)
1147 * qla2x00_wait_for_sess_deletion can only be called from remove_one. 1148 * qla2x00_wait_for_sess_deletion can only be called from remove_one.
1148 * it has dependency on UNLOADING flag to stop device discovery 1149 * it has dependency on UNLOADING flag to stop device discovery
1149 */ 1150 */
1150static void 1151void
1151qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha) 1152qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha)
1152{ 1153{
1153 qla2x00_mark_all_devices_lost(vha, 0); 1154 qla2x00_mark_all_devices_lost(vha, 0);
@@ -3603,6 +3604,8 @@ qla2x00_remove_one(struct pci_dev *pdev)
3603 3604
3604 base_vha = pci_get_drvdata(pdev); 3605 base_vha = pci_get_drvdata(pdev);
3605 ha = base_vha->hw; 3606 ha = base_vha->hw;
3607 ql_log(ql_log_info, base_vha, 0xb079,
3608 "Removing driver\n");
3606 3609
3607 /* Indicate device removal to prevent future board_disable and wait 3610 /* Indicate device removal to prevent future board_disable and wait
3608 * until any pending board_disable has completed. */ 3611 * until any pending board_disable has completed. */
@@ -3625,6 +3628,21 @@ qla2x00_remove_one(struct pci_dev *pdev)
3625 } 3628 }
3626 qla2x00_wait_for_hba_ready(base_vha); 3629 qla2x00_wait_for_hba_ready(base_vha);
3627 3630
3631 if (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha)) {
3632 if (ha->flags.fw_started)
3633 qla2x00_abort_isp_cleanup(base_vha);
3634 } else if (!IS_QLAFX00(ha)) {
3635 if (IS_QLA8031(ha)) {
3636 ql_dbg(ql_dbg_p3p, base_vha, 0xb07e,
3637 "Clearing fcoe driver presence.\n");
3638 if (qla83xx_clear_drv_presence(base_vha) != QLA_SUCCESS)
3639 ql_dbg(ql_dbg_p3p, base_vha, 0xb079,
3640 "Error while clearing DRV-Presence.\n");
3641 }
3642
3643 qla2x00_try_to_stop_firmware(base_vha);
3644 }
3645
3628 qla2x00_wait_for_sess_deletion(base_vha); 3646 qla2x00_wait_for_sess_deletion(base_vha);
3629 3647
3630 /* 3648 /*
@@ -3648,14 +3666,6 @@ qla2x00_remove_one(struct pci_dev *pdev)
3648 3666
3649 qla2x00_delete_all_vps(ha, base_vha); 3667 qla2x00_delete_all_vps(ha, base_vha);
3650 3668
3651 if (IS_QLA8031(ha)) {
3652 ql_dbg(ql_dbg_p3p, base_vha, 0xb07e,
3653 "Clearing fcoe driver presence.\n");
3654 if (qla83xx_clear_drv_presence(base_vha) != QLA_SUCCESS)
3655 ql_dbg(ql_dbg_p3p, base_vha, 0xb079,
3656 "Error while clearing DRV-Presence.\n");
3657 }
3658
3659 qla2x00_abort_all_cmds(base_vha, DID_NO_CONNECT << 16); 3669 qla2x00_abort_all_cmds(base_vha, DID_NO_CONNECT << 16);
3660 3670
3661 qla2x00_dfs_remove(base_vha); 3671 qla2x00_dfs_remove(base_vha);
@@ -3715,24 +3725,6 @@ qla2x00_free_device(scsi_qla_host_t *vha)
3715 qla2x00_stop_timer(vha); 3725 qla2x00_stop_timer(vha);
3716 3726
3717 qla25xx_delete_queues(vha); 3727 qla25xx_delete_queues(vha);
3718
3719 if (ha->flags.fce_enabled)
3720 qla2x00_disable_fce_trace(vha, NULL, NULL);
3721
3722 if (ha->eft)
3723 qla2x00_disable_eft_trace(vha);
3724
3725 if (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha)) {
3726 if (ha->flags.fw_started)
3727 qla2x00_abort_isp_cleanup(vha);
3728 } else {
3729 if (ha->flags.fw_started) {
3730 /* Stop currently executing firmware. */
3731 qla2x00_try_to_stop_firmware(vha);
3732 ha->flags.fw_started = 0;
3733 }
3734 }
3735
3736 vha->flags.online = 0; 3728 vha->flags.online = 0;
3737 3729
3738 /* turn-off interrupts on the card */ 3730 /* turn-off interrupts on the card */
@@ -6028,8 +6020,9 @@ qla2x00_do_dpc(void *data)
6028 set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); 6020 set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags);
6029 } 6021 }
6030 6022
6031 if (test_and_clear_bit(ISP_ABORT_NEEDED, 6023 if (test_and_clear_bit
6032 &base_vha->dpc_flags)) { 6024 (ISP_ABORT_NEEDED, &base_vha->dpc_flags) &&
6025 !test_bit(UNLOADING, &base_vha->dpc_flags)) {
6033 6026
6034 ql_dbg(ql_dbg_dpc, base_vha, 0x4007, 6027 ql_dbg(ql_dbg_dpc, base_vha, 0x4007,
6035 "ISP abort scheduled.\n"); 6028 "ISP abort scheduled.\n");
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 04458eb19d38..4499c787165f 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -1880,6 +1880,9 @@ qla24xx_beacon_off(struct scsi_qla_host *vha)
1880 if (IS_P3P_TYPE(ha)) 1880 if (IS_P3P_TYPE(ha))
1881 return QLA_SUCCESS; 1881 return QLA_SUCCESS;
1882 1882
1883 if (!ha->flags.fw_started)
1884 return QLA_SUCCESS;
1885
1883 ha->beacon_blink_led = 0; 1886 ha->beacon_blink_led = 0;
1884 1887
1885 if (IS_QLA2031(ha) || IS_QLA27XX(ha)) 1888 if (IS_QLA2031(ha) || IS_QLA27XX(ha))
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index cd2fdac000c9..ba9ba0e04f42 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1741,15 +1741,11 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
1741 * 1741 *
1742 * With scsi-mq enabled, there are a fixed number of preallocated 1742 * With scsi-mq enabled, there are a fixed number of preallocated
1743 * requests equal in number to shost->can_queue. If all of the 1743 * requests equal in number to shost->can_queue. If all of the
1744 * preallocated requests are already in use, then using GFP_ATOMIC with 1744 * preallocated requests are already in use, then blk_get_request()
1745 * blk_get_request() will return -EWOULDBLOCK, whereas using GFP_KERNEL 1745 * will sleep until an active command completes, freeing up a request.
1746 * will cause blk_get_request() to sleep until an active command 1746 * Although waiting in an asynchronous interface is less than ideal, we
1747 * completes, freeing up a request. Neither option is ideal, but 1747 * do not want to use BLK_MQ_REQ_NOWAIT here because userspace might
1748 * GFP_KERNEL is the better choice to prevent userspace from getting an 1748 * not expect an EWOULDBLOCK from this condition.
1749 * unexpected EWOULDBLOCK.
1750 *
1751 * With scsi-mq disabled, blk_get_request() with GFP_KERNEL usually
1752 * does not sleep except under memory pressure.
1753 */ 1749 */
1754 rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? 1750 rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ?
1755 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); 1751 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
@@ -2185,6 +2181,7 @@ sg_add_sfp(Sg_device * sdp)
2185 write_lock_irqsave(&sdp->sfd_lock, iflags); 2181 write_lock_irqsave(&sdp->sfd_lock, iflags);
2186 if (atomic_read(&sdp->detaching)) { 2182 if (atomic_read(&sdp->detaching)) {
2187 write_unlock_irqrestore(&sdp->sfd_lock, iflags); 2183 write_unlock_irqrestore(&sdp->sfd_lock, iflags);
2184 kfree(sfp);
2188 return ERR_PTR(-ENODEV); 2185 return ERR_PTR(-ENODEV);
2189 } 2186 }
2190 list_add_tail(&sfp->sfd_siblings, &sdp->sfds); 2187 list_add_tail(&sfp->sfd_siblings, &sdp->sfds);
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 3f3cb72e0c0c..d0389b20574d 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -523,18 +523,26 @@ static int sr_init_command(struct scsi_cmnd *SCpnt)
523static int sr_block_open(struct block_device *bdev, fmode_t mode) 523static int sr_block_open(struct block_device *bdev, fmode_t mode)
524{ 524{
525 struct scsi_cd *cd; 525 struct scsi_cd *cd;
526 struct scsi_device *sdev;
526 int ret = -ENXIO; 527 int ret = -ENXIO;
527 528
529 cd = scsi_cd_get(bdev->bd_disk);
530 if (!cd)
531 goto out;
532
533 sdev = cd->device;
534 scsi_autopm_get_device(sdev);
528 check_disk_change(bdev); 535 check_disk_change(bdev);
529 536
530 mutex_lock(&sr_mutex); 537 mutex_lock(&sr_mutex);
531 cd = scsi_cd_get(bdev->bd_disk); 538 ret = cdrom_open(&cd->cdi, bdev, mode);
532 if (cd) {
533 ret = cdrom_open(&cd->cdi, bdev, mode);
534 if (ret)
535 scsi_cd_put(cd);
536 }
537 mutex_unlock(&sr_mutex); 539 mutex_unlock(&sr_mutex);
540
541 scsi_autopm_put_device(sdev);
542 if (ret)
543 scsi_cd_put(cd);
544
545out:
538 return ret; 546 return ret;
539} 547}
540 548
@@ -562,6 +570,8 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
562 if (ret) 570 if (ret)
563 goto out; 571 goto out;
564 572
573 scsi_autopm_get_device(sdev);
574
565 /* 575 /*
566 * Send SCSI addressing ioctls directly to mid level, send other 576 * Send SCSI addressing ioctls directly to mid level, send other
567 * ioctls to cdrom/block level. 577 * ioctls to cdrom/block level.
@@ -570,15 +580,18 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
570 case SCSI_IOCTL_GET_IDLUN: 580 case SCSI_IOCTL_GET_IDLUN:
571 case SCSI_IOCTL_GET_BUS_NUMBER: 581 case SCSI_IOCTL_GET_BUS_NUMBER:
572 ret = scsi_ioctl(sdev, cmd, argp); 582 ret = scsi_ioctl(sdev, cmd, argp);
573 goto out; 583 goto put;
574 } 584 }
575 585
576 ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg); 586 ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
577 if (ret != -ENOSYS) 587 if (ret != -ENOSYS)
578 goto out; 588 goto put;
579 589
580 ret = scsi_ioctl(sdev, cmd, argp); 590 ret = scsi_ioctl(sdev, cmd, argp);
581 591
592put:
593 scsi_autopm_put_device(sdev);
594
582out: 595out:
583 mutex_unlock(&sr_mutex); 596 mutex_unlock(&sr_mutex);
584 return ret; 597 return ret;
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 777e5f1e52d1..0cd947f78b5b 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -561,9 +561,14 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
561 (btstat == BTSTAT_SUCCESS || 561 (btstat == BTSTAT_SUCCESS ||
562 btstat == BTSTAT_LINKED_COMMAND_COMPLETED || 562 btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
563 btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) { 563 btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
564 cmd->result = (DID_OK << 16) | sdstat; 564 if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
565 if (sdstat == SAM_STAT_CHECK_CONDITION && cmd->sense_buffer) 565 cmd->result = (DID_RESET << 16);
566 cmd->result |= (DRIVER_SENSE << 24); 566 } else {
567 cmd->result = (DID_OK << 16) | sdstat;
568 if (sdstat == SAM_STAT_CHECK_CONDITION &&
569 cmd->sense_buffer)
570 cmd->result |= (DRIVER_SENSE << 24);
571 }
567 } else 572 } else
568 switch (btstat) { 573 switch (btstat) {
569 case BTSTAT_SUCCESS: 574 case BTSTAT_SUCCESS:
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index a1a0025b59e0..d5d33e12e952 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -402,6 +402,8 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
402 fput(asma->file); 402 fput(asma->file);
403 goto out; 403 goto out;
404 } 404 }
405 } else {
406 vma_set_anonymous(vma);
405 } 407 }
406 408
407 if (vma->vm_file) 409 if (vma->vm_file)
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 514986b57c2d..25eb3891e34b 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -652,6 +652,7 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk)
652 struct iscsi_param *param; 652 struct iscsi_param *param;
653 u32 mrdsl, mbl; 653 u32 mrdsl, mbl;
654 u32 max_npdu, max_iso_npdu; 654 u32 max_npdu, max_iso_npdu;
655 u32 max_iso_payload;
655 656
656 if (conn->login->leading_connection) { 657 if (conn->login->leading_connection) {
657 param = iscsi_find_param_from_key(MAXBURSTLENGTH, 658 param = iscsi_find_param_from_key(MAXBURSTLENGTH,
@@ -670,8 +671,10 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk)
670 mrdsl = conn_ops->MaxRecvDataSegmentLength; 671 mrdsl = conn_ops->MaxRecvDataSegmentLength;
671 max_npdu = mbl / mrdsl; 672 max_npdu = mbl / mrdsl;
672 673
673 max_iso_npdu = CXGBIT_MAX_ISO_PAYLOAD / 674 max_iso_payload = rounddown(CXGBIT_MAX_ISO_PAYLOAD, csk->emss);
674 (ISCSI_HDR_LEN + mrdsl + 675
676 max_iso_npdu = max_iso_payload /
677 (ISCSI_HDR_LEN + mrdsl +
675 cxgbit_digest_len[csk->submode]); 678 cxgbit_digest_len[csk->submode]);
676 679
677 csk->max_iso_npdu = min(max_npdu, max_iso_npdu); 680 csk->max_iso_npdu = min(max_npdu, max_iso_npdu);
@@ -741,6 +744,9 @@ static int cxgbit_set_params(struct iscsi_conn *conn)
741 if (conn_ops->MaxRecvDataSegmentLength > cdev->mdsl) 744 if (conn_ops->MaxRecvDataSegmentLength > cdev->mdsl)
742 conn_ops->MaxRecvDataSegmentLength = cdev->mdsl; 745 conn_ops->MaxRecvDataSegmentLength = cdev->mdsl;
743 746
747 if (cxgbit_set_digest(csk))
748 return -1;
749
744 if (conn->login->leading_connection) { 750 if (conn->login->leading_connection) {
745 param = iscsi_find_param_from_key(ERRORRECOVERYLEVEL, 751 param = iscsi_find_param_from_key(ERRORRECOVERYLEVEL,
746 conn->param_list); 752 conn->param_list);
@@ -764,7 +770,7 @@ static int cxgbit_set_params(struct iscsi_conn *conn)
764 if (is_t5(cdev->lldi.adapter_type)) 770 if (is_t5(cdev->lldi.adapter_type))
765 goto enable_ddp; 771 goto enable_ddp;
766 else 772 else
767 goto enable_digest; 773 return 0;
768 } 774 }
769 775
770 if (test_bit(CDEV_ISO_ENABLE, &cdev->flags)) { 776 if (test_bit(CDEV_ISO_ENABLE, &cdev->flags)) {
@@ -781,10 +787,6 @@ enable_ddp:
781 } 787 }
782 } 788 }
783 789
784enable_digest:
785 if (cxgbit_set_digest(csk))
786 return -1;
787
788 return 0; 790 return 0;
789} 791}
790 792
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index a502f1af4a21..ed3114556fda 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1560,9 +1560,12 @@ int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled)
1560 d->iotlb = niotlb; 1560 d->iotlb = niotlb;
1561 1561
1562 for (i = 0; i < d->nvqs; ++i) { 1562 for (i = 0; i < d->nvqs; ++i) {
1563 mutex_lock(&d->vqs[i]->mutex); 1563 struct vhost_virtqueue *vq = d->vqs[i];
1564 d->vqs[i]->iotlb = niotlb; 1564
1565 mutex_unlock(&d->vqs[i]->mutex); 1565 mutex_lock(&vq->mutex);
1566 vq->iotlb = niotlb;
1567 __vhost_vq_meta_reset(vq);
1568 mutex_unlock(&vq->mutex);
1566 } 1569 }
1567 1570
1568 vhost_umem_clean(oiotlb); 1571 vhost_umem_clean(oiotlb);
diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index 46a4484e3da7..c6f78d27947b 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -20,7 +20,7 @@
20#include <drm/drm_connector.h> /* For DRM_MODE_PANEL_ORIENTATION_* */ 20#include <drm/drm_connector.h> /* For DRM_MODE_PANEL_ORIENTATION_* */
21 21
22static bool request_mem_succeeded = false; 22static bool request_mem_succeeded = false;
23static bool nowc = false; 23static u64 mem_flags = EFI_MEMORY_WC | EFI_MEMORY_UC;
24 24
25static struct fb_var_screeninfo efifb_defined = { 25static struct fb_var_screeninfo efifb_defined = {
26 .activate = FB_ACTIVATE_NOW, 26 .activate = FB_ACTIVATE_NOW,
@@ -68,8 +68,12 @@ static int efifb_setcolreg(unsigned regno, unsigned red, unsigned green,
68 68
69static void efifb_destroy(struct fb_info *info) 69static void efifb_destroy(struct fb_info *info)
70{ 70{
71 if (info->screen_base) 71 if (info->screen_base) {
72 iounmap(info->screen_base); 72 if (mem_flags & (EFI_MEMORY_UC | EFI_MEMORY_WC))
73 iounmap(info->screen_base);
74 else
75 memunmap(info->screen_base);
76 }
73 if (request_mem_succeeded) 77 if (request_mem_succeeded)
74 release_mem_region(info->apertures->ranges[0].base, 78 release_mem_region(info->apertures->ranges[0].base,
75 info->apertures->ranges[0].size); 79 info->apertures->ranges[0].size);
@@ -104,7 +108,7 @@ static int efifb_setup(char *options)
104 else if (!strncmp(this_opt, "width:", 6)) 108 else if (!strncmp(this_opt, "width:", 6))
105 screen_info.lfb_width = simple_strtoul(this_opt+6, NULL, 0); 109 screen_info.lfb_width = simple_strtoul(this_opt+6, NULL, 0);
106 else if (!strcmp(this_opt, "nowc")) 110 else if (!strcmp(this_opt, "nowc"))
107 nowc = true; 111 mem_flags &= ~EFI_MEMORY_WC;
108 } 112 }
109 } 113 }
110 114
@@ -164,6 +168,7 @@ static int efifb_probe(struct platform_device *dev)
164 unsigned int size_remap; 168 unsigned int size_remap;
165 unsigned int size_total; 169 unsigned int size_total;
166 char *option = NULL; 170 char *option = NULL;
171 efi_memory_desc_t md;
167 172
168 if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled) 173 if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled)
169 return -ENODEV; 174 return -ENODEV;
@@ -272,12 +277,35 @@ static int efifb_probe(struct platform_device *dev)
272 info->apertures->ranges[0].base = efifb_fix.smem_start; 277 info->apertures->ranges[0].base = efifb_fix.smem_start;
273 info->apertures->ranges[0].size = size_remap; 278 info->apertures->ranges[0].size = size_remap;
274 279
275 if (nowc) 280 if (!efi_mem_desc_lookup(efifb_fix.smem_start, &md)) {
276 info->screen_base = ioremap(efifb_fix.smem_start, efifb_fix.smem_len); 281 if ((efifb_fix.smem_start + efifb_fix.smem_len) >
277 else 282 (md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT))) {
278 info->screen_base = ioremap_wc(efifb_fix.smem_start, efifb_fix.smem_len); 283 pr_err("efifb: video memory @ 0x%lx spans multiple EFI memory regions\n",
284 efifb_fix.smem_start);
285 err = -EIO;
286 goto err_release_fb;
287 }
288 /*
289 * If the UEFI memory map covers the efifb region, we may only
290 * remap it using the attributes the memory map prescribes.
291 */
292 mem_flags |= EFI_MEMORY_WT | EFI_MEMORY_WB;
293 mem_flags &= md.attribute;
294 }
295 if (mem_flags & EFI_MEMORY_WC)
296 info->screen_base = ioremap_wc(efifb_fix.smem_start,
297 efifb_fix.smem_len);
298 else if (mem_flags & EFI_MEMORY_UC)
299 info->screen_base = ioremap(efifb_fix.smem_start,
300 efifb_fix.smem_len);
301 else if (mem_flags & EFI_MEMORY_WT)
302 info->screen_base = memremap(efifb_fix.smem_start,
303 efifb_fix.smem_len, MEMREMAP_WT);
304 else if (mem_flags & EFI_MEMORY_WB)
305 info->screen_base = memremap(efifb_fix.smem_start,
306 efifb_fix.smem_len, MEMREMAP_WB);
279 if (!info->screen_base) { 307 if (!info->screen_base) {
280 pr_err("efifb: abort, cannot ioremap video memory 0x%x @ 0x%lx\n", 308 pr_err("efifb: abort, cannot remap video memory 0x%x @ 0x%lx\n",
281 efifb_fix.smem_len, efifb_fix.smem_start); 309 efifb_fix.smem_len, efifb_fix.smem_start);
282 err = -EIO; 310 err = -EIO;
283 goto err_release_fb; 311 goto err_release_fb;
@@ -371,7 +399,10 @@ err_fb_dealoc:
371err_groups: 399err_groups:
372 sysfs_remove_groups(&dev->dev.kobj, efifb_groups); 400 sysfs_remove_groups(&dev->dev.kobj, efifb_groups);
373err_unmap: 401err_unmap:
374 iounmap(info->screen_base); 402 if (mem_flags & (EFI_MEMORY_UC | EFI_MEMORY_WC))
403 iounmap(info->screen_base);
404 else
405 memunmap(info->screen_base);
375err_release_fb: 406err_release_fb:
376 framebuffer_release(info); 407 framebuffer_release(info);
377err_release_mem: 408err_release_mem:
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 6b237e3f4983..3988c0914322 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -513,7 +513,9 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
513 tell_host(vb, vb->inflate_vq); 513 tell_host(vb, vb->inflate_vq);
514 514
515 /* balloon's page migration 2nd step -- deflate "page" */ 515 /* balloon's page migration 2nd step -- deflate "page" */
516 spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
516 balloon_page_delete(page); 517 balloon_page_delete(page);
518 spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
517 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; 519 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
518 set_page_pfns(vb, vb->pfns, page); 520 set_page_pfns(vb, vb->pfns, page);
519 tell_host(vb, vb->deflate_vq); 521 tell_host(vb, vb->deflate_vq);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a1b18082991b..183cc5418722 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
648 trace_afs_notify_call(rxcall, call); 648 trace_afs_notify_call(rxcall, call);
649 call->need_attention = true; 649 call->need_attention = true;
650 650
651 u = __atomic_add_unless(&call->usage, 1, 0); 651 u = atomic_fetch_add_unless(&call->usage, 1, 0);
652 if (u != 0) { 652 if (u != 0) {
653 trace_afs_call(call, afs_call_trace_wake, u, 653 trace_afs_call(call, afs_call_trace_wake, u,
654 atomic_read(&call->net->nr_outstanding_calls), 654 atomic_read(&call->net->nr_outstanding_calls),
diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..ceb7b491d1b9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -358,14 +358,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
358 __releases(dentry->d_inode->i_lock) 358 __releases(dentry->d_inode->i_lock)
359{ 359{
360 struct inode *inode = dentry->d_inode; 360 struct inode *inode = dentry->d_inode;
361 bool hashed = !d_unhashed(dentry);
362 361
363 if (hashed) 362 raw_write_seqcount_begin(&dentry->d_seq);
364 raw_write_seqcount_begin(&dentry->d_seq);
365 __d_clear_type_and_inode(dentry); 363 __d_clear_type_and_inode(dentry);
366 hlist_del_init(&dentry->d_u.d_alias); 364 hlist_del_init(&dentry->d_u.d_alias);
367 if (hashed) 365 raw_write_seqcount_end(&dentry->d_seq);
368 raw_write_seqcount_end(&dentry->d_seq);
369 spin_unlock(&dentry->d_lock); 366 spin_unlock(&dentry->d_lock);
370 spin_unlock(&inode->i_lock); 367 spin_unlock(&inode->i_lock);
371 if (!inode->i_nlink) 368 if (!inode->i_nlink)
@@ -1932,10 +1929,12 @@ struct dentry *d_make_root(struct inode *root_inode)
1932 1929
1933 if (root_inode) { 1930 if (root_inode) {
1934 res = d_alloc_anon(root_inode->i_sb); 1931 res = d_alloc_anon(root_inode->i_sb);
1935 if (res) 1932 if (res) {
1933 res->d_flags |= DCACHE_RCUACCESS;
1936 d_instantiate(res, root_inode); 1934 d_instantiate(res, root_inode);
1937 else 1935 } else {
1938 iput(root_inode); 1936 iput(root_inode);
1937 }
1939 } 1938 }
1940 return res; 1939 return res;
1941} 1940}
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 71fccccf317e..8c6ab6c95727 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -86,7 +86,9 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
86 /* length of the variable name itself: remove GUID and separator */ 86 /* length of the variable name itself: remove GUID and separator */
87 namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1; 87 namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1;
88 88
89 uuid_le_to_bin(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); 89 err = guid_parse(dentry->d_name.name + namelen + 1, &var->var.VendorGuid);
90 if (err)
91 goto out;
90 92
91 if (efivar_variable_is_removable(var->var.VendorGuid, 93 if (efivar_variable_is_removable(var->var.VendorGuid,
92 dentry->d_name.name, namelen)) 94 dentry->d_name.name, namelen))
diff --git a/fs/iomap.c b/fs/iomap.c
index 77397b5a96ef..0d0bd8845586 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1443,7 +1443,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno,
1443 const struct iomap_ops *ops) 1443 const struct iomap_ops *ops)
1444{ 1444{
1445 struct inode *inode = mapping->host; 1445 struct inode *inode = mapping->host;
1446 loff_t pos = bno >> inode->i_blkbits; 1446 loff_t pos = bno << inode->i_blkbits;
1447 unsigned blocksize = i_blocksize(inode); 1447 unsigned blocksize = i_blocksize(inode);
1448 1448
1449 if (filemap_write_and_wait(mapping)) 1449 if (filemap_write_and_wait(mapping))
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 395c4c0d0f06..1682a87c00b2 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -115,6 +115,13 @@ struct dinode {
115 dxd_t _dxd; /* 16: */ 115 dxd_t _dxd; /* 16: */
116 union { 116 union {
117 __le32 _rdev; /* 4: */ 117 __le32 _rdev; /* 4: */
118 /*
119 * The fast symlink area
120 * is expected to overflow
121 * into _inlineea when
122 * needed (which will clear
123 * INLINEEA).
124 */
118 u8 _fastsymlink[128]; 125 u8 _fastsymlink[128];
119 } _u; 126 } _u;
120 u8 _inlineea[128]; 127 u8 _inlineea[128];
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 1f26d1910409..9940a1e04cbf 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -87,6 +87,7 @@ struct jfs_inode_info {
87 struct { 87 struct {
88 unchar _unused[16]; /* 16: */ 88 unchar _unused[16]; /* 16: */
89 dxd_t _dxd; /* 16: */ 89 dxd_t _dxd; /* 16: */
90 /* _inline may overflow into _inline_ea when needed */
90 unchar _inline[128]; /* 128: inline symlink */ 91 unchar _inline[128]; /* 128: inline symlink */
91 /* _inline_ea may overlay the last part of 92 /* _inline_ea may overlay the last part of
92 * file._xtroot if maxentry = XTROOTINITSLOT 93 * file._xtroot if maxentry = XTROOTINITSLOT
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1b9264fd54b6..f08571433aba 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -967,8 +967,7 @@ static int __init init_jfs_fs(void)
967 jfs_inode_cachep = 967 jfs_inode_cachep =
968 kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), 968 kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info),
969 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, 969 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
970 offsetof(struct jfs_inode_info, i_inline), 970 offsetof(struct jfs_inode_info, i_inline), IDATASIZE,
971 sizeof_field(struct jfs_inode_info, i_inline),
972 init_once); 971 init_once);
973 if (jfs_inode_cachep == NULL) 972 if (jfs_inode_cachep == NULL)
974 return -ENOMEM; 973 return -ENOMEM;
diff --git a/fs/namespace.c b/fs/namespace.c
index 8ddd14806799..bd2f4c68506a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
659 return 0; 659 return 0;
660 mnt = real_mount(bastard); 660 mnt = real_mount(bastard);
661 mnt_add_count(mnt, 1); 661 mnt_add_count(mnt, 1);
662 smp_mb(); // see mntput_no_expire()
662 if (likely(!read_seqretry(&mount_lock, seq))) 663 if (likely(!read_seqretry(&mount_lock, seq)))
663 return 0; 664 return 0;
664 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { 665 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
665 mnt_add_count(mnt, -1); 666 mnt_add_count(mnt, -1);
666 return 1; 667 return 1;
667 } 668 }
669 lock_mount_hash();
670 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
671 mnt_add_count(mnt, -1);
672 unlock_mount_hash();
673 return 1;
674 }
675 unlock_mount_hash();
676 /* caller will mntput() */
668 return -1; 677 return -1;
669} 678}
670 679
@@ -1195,12 +1204,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1195static void mntput_no_expire(struct mount *mnt) 1204static void mntput_no_expire(struct mount *mnt)
1196{ 1205{
1197 rcu_read_lock(); 1206 rcu_read_lock();
1198 mnt_add_count(mnt, -1); 1207 if (likely(READ_ONCE(mnt->mnt_ns))) {
1199 if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ 1208 /*
1209 * Since we don't do lock_mount_hash() here,
1210 * ->mnt_ns can change under us. However, if it's
1211 * non-NULL, then there's a reference that won't
1212 * be dropped until after an RCU delay done after
1213 * turning ->mnt_ns NULL. So if we observe it
1214 * non-NULL under rcu_read_lock(), the reference
1215 * we are dropping is not the final one.
1216 */
1217 mnt_add_count(mnt, -1);
1200 rcu_read_unlock(); 1218 rcu_read_unlock();
1201 return; 1219 return;
1202 } 1220 }
1203 lock_mount_hash(); 1221 lock_mount_hash();
1222 /*
1223 * make sure that if __legitimize_mnt() has not seen us grab
1224 * mount_lock, we'll see their refcount increment here.
1225 */
1226 smp_mb();
1227 mnt_add_count(mnt, -1);
1204 if (mnt_get_count(mnt)) { 1228 if (mnt_get_count(mnt)) {
1205 rcu_read_unlock(); 1229 rcu_read_unlock();
1206 unlock_mount_hash(); 1230 unlock_mount_hash();
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6dd146885da9..f6c4ccd693f4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6466,34 +6466,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
6466 if (data->arg.new_lock && !data->cancelled) { 6466 if (data->arg.new_lock && !data->cancelled) {
6467 data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); 6467 data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
6468 if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) 6468 if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
6469 break; 6469 goto out_restart;
6470 } 6470 }
6471
6472 if (data->arg.new_lock_owner != 0) { 6471 if (data->arg.new_lock_owner != 0) {
6473 nfs_confirm_seqid(&lsp->ls_seqid, 0); 6472 nfs_confirm_seqid(&lsp->ls_seqid, 0);
6474 nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); 6473 nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
6475 set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); 6474 set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
6476 goto out_done; 6475 } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
6477 } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) 6476 goto out_restart;
6478 goto out_done;
6479
6480 break; 6477 break;
6481 case -NFS4ERR_BAD_STATEID: 6478 case -NFS4ERR_BAD_STATEID:
6482 case -NFS4ERR_OLD_STATEID: 6479 case -NFS4ERR_OLD_STATEID:
6483 case -NFS4ERR_STALE_STATEID: 6480 case -NFS4ERR_STALE_STATEID:
6484 case -NFS4ERR_EXPIRED: 6481 case -NFS4ERR_EXPIRED:
6485 if (data->arg.new_lock_owner != 0) { 6482 if (data->arg.new_lock_owner != 0) {
6486 if (nfs4_stateid_match(&data->arg.open_stateid, 6483 if (!nfs4_stateid_match(&data->arg.open_stateid,
6487 &lsp->ls_state->open_stateid)) 6484 &lsp->ls_state->open_stateid))
6488 goto out_done; 6485 goto out_restart;
6489 } else if (nfs4_stateid_match(&data->arg.lock_stateid, 6486 } else if (!nfs4_stateid_match(&data->arg.lock_stateid,
6490 &lsp->ls_stateid)) 6487 &lsp->ls_stateid))
6491 goto out_done; 6488 goto out_restart;
6492 } 6489 }
6493 if (!data->cancelled)
6494 rpc_restart_call_prepare(task);
6495out_done: 6490out_done:
6496 dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); 6491 dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
6492 return;
6493out_restart:
6494 if (!data->cancelled)
6495 rpc_restart_call_prepare(task);
6496 goto out_done;
6497} 6497}
6498 6498
6499static void nfs4_lock_release(void *calldata) 6499static void nfs4_lock_release(void *calldata)
@@ -6502,7 +6502,7 @@ static void nfs4_lock_release(void *calldata)
6502 6502
6503 dprintk("%s: begin!\n", __func__); 6503 dprintk("%s: begin!\n", __func__);
6504 nfs_free_seqid(data->arg.open_seqid); 6504 nfs_free_seqid(data->arg.open_seqid);
6505 if (data->cancelled) { 6505 if (data->cancelled && data->rpc_status == 0) {
6506 struct rpc_task *task; 6506 struct rpc_task *task;
6507 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 6507 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
6508 data->arg.lock_seqid); 6508 data->arg.lock_seqid);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2751476e6b6e..f098b9f1c396 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -167,6 +167,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
167 } 167 }
168 168
169 if (compressed) { 169 if (compressed) {
170 if (!msblk->stream)
171 goto read_failure;
170 length = squashfs_decompress(msblk, bh, b, offset, length, 172 length = squashfs_decompress(msblk, bh, b, offset, length,
171 output); 173 output);
172 if (length < 0) 174 if (length < 0)
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index fcff2e0487fe..f1c1430ae721 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -374,13 +374,29 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
374 return squashfs_block_size(size); 374 return squashfs_block_size(size);
375} 375}
376 376
377void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail)
378{
379 int copied;
380 void *pageaddr;
381
382 pageaddr = kmap_atomic(page);
383 copied = squashfs_copy_data(pageaddr, buffer, offset, avail);
384 memset(pageaddr + copied, 0, PAGE_SIZE - copied);
385 kunmap_atomic(pageaddr);
386
387 flush_dcache_page(page);
388 if (copied == avail)
389 SetPageUptodate(page);
390 else
391 SetPageError(page);
392}
393
377/* Copy data into page cache */ 394/* Copy data into page cache */
378void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, 395void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
379 int bytes, int offset) 396 int bytes, int offset)
380{ 397{
381 struct inode *inode = page->mapping->host; 398 struct inode *inode = page->mapping->host;
382 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 399 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
383 void *pageaddr;
384 int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; 400 int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
385 int start_index = page->index & ~mask, end_index = start_index | mask; 401 int start_index = page->index & ~mask, end_index = start_index | mask;
386 402
@@ -406,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
406 if (PageUptodate(push_page)) 422 if (PageUptodate(push_page))
407 goto skip_page; 423 goto skip_page;
408 424
409 pageaddr = kmap_atomic(push_page); 425 squashfs_fill_page(push_page, buffer, offset, avail);
410 squashfs_copy_data(pageaddr, buffer, offset, avail);
411 memset(pageaddr + avail, 0, PAGE_SIZE - avail);
412 kunmap_atomic(pageaddr);
413 flush_dcache_page(push_page);
414 SetPageUptodate(push_page);
415skip_page: 426skip_page:
416 unlock_page(push_page); 427 unlock_page(push_page);
417 if (i != page->index) 428 if (i != page->index)
@@ -420,10 +431,9 @@ skip_page:
420} 431}
421 432
422/* Read datablock stored packed inside a fragment (tail-end packed block) */ 433/* Read datablock stored packed inside a fragment (tail-end packed block) */
423static int squashfs_readpage_fragment(struct page *page) 434static int squashfs_readpage_fragment(struct page *page, int expected)
424{ 435{
425 struct inode *inode = page->mapping->host; 436 struct inode *inode = page->mapping->host;
426 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
427 struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, 437 struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
428 squashfs_i(inode)->fragment_block, 438 squashfs_i(inode)->fragment_block,
429 squashfs_i(inode)->fragment_size); 439 squashfs_i(inode)->fragment_size);
@@ -434,23 +444,16 @@ static int squashfs_readpage_fragment(struct page *page)
434 squashfs_i(inode)->fragment_block, 444 squashfs_i(inode)->fragment_block,
435 squashfs_i(inode)->fragment_size); 445 squashfs_i(inode)->fragment_size);
436 else 446 else
437 squashfs_copy_cache(page, buffer, i_size_read(inode) & 447 squashfs_copy_cache(page, buffer, expected,
438 (msblk->block_size - 1),
439 squashfs_i(inode)->fragment_offset); 448 squashfs_i(inode)->fragment_offset);
440 449
441 squashfs_cache_put(buffer); 450 squashfs_cache_put(buffer);
442 return res; 451 return res;
443} 452}
444 453
445static int squashfs_readpage_sparse(struct page *page, int index, int file_end) 454static int squashfs_readpage_sparse(struct page *page, int expected)
446{ 455{
447 struct inode *inode = page->mapping->host; 456 squashfs_copy_cache(page, NULL, expected, 0);
448 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
449 int bytes = index == file_end ?
450 (i_size_read(inode) & (msblk->block_size - 1)) :
451 msblk->block_size;
452
453 squashfs_copy_cache(page, NULL, bytes, 0);
454 return 0; 457 return 0;
455} 458}
456 459
@@ -460,6 +463,9 @@ static int squashfs_readpage(struct file *file, struct page *page)
460 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 463 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
461 int index = page->index >> (msblk->block_log - PAGE_SHIFT); 464 int index = page->index >> (msblk->block_log - PAGE_SHIFT);
462 int file_end = i_size_read(inode) >> msblk->block_log; 465 int file_end = i_size_read(inode) >> msblk->block_log;
466 int expected = index == file_end ?
467 (i_size_read(inode) & (msblk->block_size - 1)) :
468 msblk->block_size;
463 int res; 469 int res;
464 void *pageaddr; 470 void *pageaddr;
465 471
@@ -478,11 +484,11 @@ static int squashfs_readpage(struct file *file, struct page *page)
478 goto error_out; 484 goto error_out;
479 485
480 if (bsize == 0) 486 if (bsize == 0)
481 res = squashfs_readpage_sparse(page, index, file_end); 487 res = squashfs_readpage_sparse(page, expected);
482 else 488 else
483 res = squashfs_readpage_block(page, block, bsize); 489 res = squashfs_readpage_block(page, block, bsize, expected);
484 } else 490 } else
485 res = squashfs_readpage_fragment(page); 491 res = squashfs_readpage_fragment(page, expected);
486 492
487 if (!res) 493 if (!res)
488 return 0; 494 return 0;
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
index f2310d2a2019..a9ba8d96776a 100644
--- a/fs/squashfs/file_cache.c
+++ b/fs/squashfs/file_cache.c
@@ -20,7 +20,7 @@
20#include "squashfs.h" 20#include "squashfs.h"
21 21
22/* Read separately compressed datablock and memcopy into page cache */ 22/* Read separately compressed datablock and memcopy into page cache */
23int squashfs_readpage_block(struct page *page, u64 block, int bsize) 23int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected)
24{ 24{
25 struct inode *i = page->mapping->host; 25 struct inode *i = page->mapping->host;
26 struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, 26 struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
@@ -31,7 +31,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize)
31 ERROR("Unable to read page, block %llx, size %x\n", block, 31 ERROR("Unable to read page, block %llx, size %x\n", block,
32 bsize); 32 bsize);
33 else 33 else
34 squashfs_copy_cache(page, buffer, buffer->length, 0); 34 squashfs_copy_cache(page, buffer, expected, 0);
35 35
36 squashfs_cache_put(buffer); 36 squashfs_cache_put(buffer);
37 return res; 37 return res;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index cb485d8e0e91..80db1b86a27c 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -21,10 +21,11 @@
21#include "page_actor.h" 21#include "page_actor.h"
22 22
23static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, 23static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
24 int pages, struct page **page); 24 int pages, struct page **page, int bytes);
25 25
26/* Read separately compressed datablock directly into page cache */ 26/* Read separately compressed datablock directly into page cache */
27int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) 27int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
28 int expected)
28 29
29{ 30{
30 struct inode *inode = target_page->mapping->host; 31 struct inode *inode = target_page->mapping->host;
@@ -83,7 +84,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
83 * using an intermediate buffer. 84 * using an intermediate buffer.
84 */ 85 */
85 res = squashfs_read_cache(target_page, block, bsize, pages, 86 res = squashfs_read_cache(target_page, block, bsize, pages,
86 page); 87 page, expected);
87 if (res < 0) 88 if (res < 0)
88 goto mark_errored; 89 goto mark_errored;
89 90
@@ -95,6 +96,11 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
95 if (res < 0) 96 if (res < 0)
96 goto mark_errored; 97 goto mark_errored;
97 98
99 if (res != expected) {
100 res = -EIO;
101 goto mark_errored;
102 }
103
98 /* Last page may have trailing bytes not filled */ 104 /* Last page may have trailing bytes not filled */
99 bytes = res % PAGE_SIZE; 105 bytes = res % PAGE_SIZE;
100 if (bytes) { 106 if (bytes) {
@@ -138,13 +144,12 @@ out:
138 144
139 145
140static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, 146static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
141 int pages, struct page **page) 147 int pages, struct page **page, int bytes)
142{ 148{
143 struct inode *i = target_page->mapping->host; 149 struct inode *i = target_page->mapping->host;
144 struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, 150 struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
145 block, bsize); 151 block, bsize);
146 int bytes = buffer->length, res = buffer->error, n, offset = 0; 152 int res = buffer->error, n, offset = 0;
147 void *pageaddr;
148 153
149 if (res) { 154 if (res) {
150 ERROR("Unable to read page, block %llx, size %x\n", block, 155 ERROR("Unable to read page, block %llx, size %x\n", block,
@@ -159,12 +164,7 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
159 if (page[n] == NULL) 164 if (page[n] == NULL)
160 continue; 165 continue;
161 166
162 pageaddr = kmap_atomic(page[n]); 167 squashfs_fill_page(page[n], buffer, offset, avail);
163 squashfs_copy_data(pageaddr, buffer, offset, avail);
164 memset(pageaddr + avail, 0, PAGE_SIZE - avail);
165 kunmap_atomic(pageaddr);
166 flush_dcache_page(page[n]);
167 SetPageUptodate(page[n]);
168 unlock_page(page[n]); 168 unlock_page(page[n]);
169 if (page[n] != target_page) 169 if (page[n] != target_page)
170 put_page(page[n]); 170 put_page(page[n]);
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 86ad9a4b8c36..0681feab4a84 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
49 u64 *fragment_block) 49 u64 *fragment_block)
50{ 50{
51 struct squashfs_sb_info *msblk = sb->s_fs_info; 51 struct squashfs_sb_info *msblk = sb->s_fs_info;
52 int block = SQUASHFS_FRAGMENT_INDEX(fragment); 52 int block, offset, size;
53 int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
54 u64 start_block = le64_to_cpu(msblk->fragment_index[block]);
55 struct squashfs_fragment_entry fragment_entry; 53 struct squashfs_fragment_entry fragment_entry;
56 int size; 54 u64 start_block;
55
56 if (fragment >= msblk->fragments)
57 return -EIO;
58 block = SQUASHFS_FRAGMENT_INDEX(fragment);
59 offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
60
61 start_block = le64_to_cpu(msblk->fragment_index[block]);
57 62
58 size = squashfs_read_metadata(sb, &fragment_entry, &start_block, 63 size = squashfs_read_metadata(sb, &fragment_entry, &start_block,
59 &offset, sizeof(fragment_entry)); 64 &offset, sizeof(fragment_entry));
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 887d6d270080..f89f8a74c6ce 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -67,11 +67,12 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
67 u64, u64, unsigned int); 67 u64, u64, unsigned int);
68 68
69/* file.c */ 69/* file.c */
70void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int);
70void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, 71void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
71 int); 72 int);
72 73
73/* file_xxx.c */ 74/* file_xxx.c */
74extern int squashfs_readpage_block(struct page *, u64, int); 75extern int squashfs_readpage_block(struct page *, u64, int, int);
75 76
76/* id.c */ 77/* id.c */
77extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); 78extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1da565cb50c3..ef69c31947bf 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -75,6 +75,7 @@ struct squashfs_sb_info {
75 unsigned short block_log; 75 unsigned short block_log;
76 long long bytes_used; 76 long long bytes_used;
77 unsigned int inodes; 77 unsigned int inodes;
78 unsigned int fragments;
78 int xattr_ids; 79 int xattr_ids;
79}; 80};
80#endif 81#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 8a73b97217c8..40e657386fa5 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
175 msblk->inode_table = le64_to_cpu(sblk->inode_table_start); 175 msblk->inode_table = le64_to_cpu(sblk->inode_table_start);
176 msblk->directory_table = le64_to_cpu(sblk->directory_table_start); 176 msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
177 msblk->inodes = le32_to_cpu(sblk->inodes); 177 msblk->inodes = le32_to_cpu(sblk->inodes);
178 msblk->fragments = le32_to_cpu(sblk->fragments);
178 flags = le16_to_cpu(sblk->flags); 179 flags = le16_to_cpu(sblk->flags);
179 180
180 TRACE("Found valid superblock on %pg\n", sb->s_bdev); 181 TRACE("Found valid superblock on %pg\n", sb->s_bdev);
@@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
185 TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); 186 TRACE("Filesystem size %lld bytes\n", msblk->bytes_used);
186 TRACE("Block size %d\n", msblk->block_size); 187 TRACE("Block size %d\n", msblk->block_size);
187 TRACE("Number of inodes %d\n", msblk->inodes); 188 TRACE("Number of inodes %d\n", msblk->inodes);
188 TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments)); 189 TRACE("Number of fragments %d\n", msblk->fragments);
189 TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); 190 TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
190 TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); 191 TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
191 TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); 192 TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
@@ -272,7 +273,7 @@ allocate_id_index_table:
272 sb->s_export_op = &squashfs_export_ops; 273 sb->s_export_op = &squashfs_export_ops;
273 274
274handle_fragments: 275handle_fragments:
275 fragments = le32_to_cpu(sblk->fragments); 276 fragments = msblk->fragments;
276 if (fragments == 0) 277 if (fragments == 0)
277 goto check_directory_table; 278 goto check_directory_table;
278 279
diff --git a/fs/timerfd.c b/fs/timerfd.c
index cdad49da3ff7..38c695ce385b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -533,8 +533,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
533} 533}
534 534
535SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, 535SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
536 const struct itimerspec __user *, utmr, 536 const struct __kernel_itimerspec __user *, utmr,
537 struct itimerspec __user *, otmr) 537 struct __kernel_itimerspec __user *, otmr)
538{ 538{
539 struct itimerspec64 new, old; 539 struct itimerspec64 new, old;
540 int ret; 540 int ret;
@@ -550,7 +550,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
550 return ret; 550 return ret;
551} 551}
552 552
553SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 553SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr)
554{ 554{
555 struct itimerspec64 kotmr; 555 struct itimerspec64 kotmr;
556 int ret = do_timerfd_gettime(ufd, &kotmr); 556 int ret = do_timerfd_gettime(ufd, &kotmr);
@@ -559,7 +559,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
559 return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0; 559 return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
560} 560}
561 561
562#ifdef CONFIG_COMPAT 562#ifdef CONFIG_COMPAT_32BIT_TIME
563COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, 563COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
564 const struct compat_itimerspec __user *, utmr, 564 const struct compat_itimerspec __user *, utmr,
565 struct compat_itimerspec __user *, otmr) 565 struct compat_itimerspec __user *, otmr)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 594d192b2331..bad9cea37f12 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -633,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
633 /* the various vma->vm_userfaultfd_ctx still points to it */ 633 /* the various vma->vm_userfaultfd_ctx still points to it */
634 down_write(&mm->mmap_sem); 634 down_write(&mm->mmap_sem);
635 for (vma = mm->mmap; vma; vma = vma->vm_next) 635 for (vma = mm->mmap; vma; vma = vma->vm_next)
636 if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) 636 if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
637 vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; 637 vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
638 vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
639 }
638 up_write(&mm->mmap_sem); 640 up_write(&mm->mmap_sem);
639 641
640 userfaultfd_ctx_put(release_new_ctx); 642 userfaultfd_ctx_put(release_new_ctx);
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index ec07f23678ea..0d4b1d3dbc1e 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -84,42 +84,59 @@ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 ne
84} 84}
85#endif 85#endif
86 86
87static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) 87#ifdef arch_atomic_fetch_add_unless
88#define atomic_fetch_add_unless atomic_fetch_add_unless
89static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
88{ 90{
89 kasan_check_write(v, sizeof(*v)); 91 kasan_check_write(v, sizeof(*v));
90 return __arch_atomic_add_unless(v, a, u); 92 return arch_atomic_fetch_add_unless(v, a, u);
91} 93}
94#endif
92 95
93 96#ifdef arch_atomic64_fetch_add_unless
94static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u) 97#define atomic64_fetch_add_unless atomic64_fetch_add_unless
98static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
95{ 99{
96 kasan_check_write(v, sizeof(*v)); 100 kasan_check_write(v, sizeof(*v));
97 return arch_atomic64_add_unless(v, a, u); 101 return arch_atomic64_fetch_add_unless(v, a, u);
98} 102}
103#endif
99 104
105#ifdef arch_atomic_inc
106#define atomic_inc atomic_inc
100static __always_inline void atomic_inc(atomic_t *v) 107static __always_inline void atomic_inc(atomic_t *v)
101{ 108{
102 kasan_check_write(v, sizeof(*v)); 109 kasan_check_write(v, sizeof(*v));
103 arch_atomic_inc(v); 110 arch_atomic_inc(v);
104} 111}
112#endif
105 113
114#ifdef arch_atomic64_inc
115#define atomic64_inc atomic64_inc
106static __always_inline void atomic64_inc(atomic64_t *v) 116static __always_inline void atomic64_inc(atomic64_t *v)
107{ 117{
108 kasan_check_write(v, sizeof(*v)); 118 kasan_check_write(v, sizeof(*v));
109 arch_atomic64_inc(v); 119 arch_atomic64_inc(v);
110} 120}
121#endif
111 122
123#ifdef arch_atomic_dec
124#define atomic_dec atomic_dec
112static __always_inline void atomic_dec(atomic_t *v) 125static __always_inline void atomic_dec(atomic_t *v)
113{ 126{
114 kasan_check_write(v, sizeof(*v)); 127 kasan_check_write(v, sizeof(*v));
115 arch_atomic_dec(v); 128 arch_atomic_dec(v);
116} 129}
130#endif
117 131
132#ifdef atch_atomic64_dec
133#define atomic64_dec
118static __always_inline void atomic64_dec(atomic64_t *v) 134static __always_inline void atomic64_dec(atomic64_t *v)
119{ 135{
120 kasan_check_write(v, sizeof(*v)); 136 kasan_check_write(v, sizeof(*v));
121 arch_atomic64_dec(v); 137 arch_atomic64_dec(v);
122} 138}
139#endif
123 140
124static __always_inline void atomic_add(int i, atomic_t *v) 141static __always_inline void atomic_add(int i, atomic_t *v)
125{ 142{
@@ -181,65 +198,95 @@ static __always_inline void atomic64_xor(s64 i, atomic64_t *v)
181 arch_atomic64_xor(i, v); 198 arch_atomic64_xor(i, v);
182} 199}
183 200
201#ifdef arch_atomic_inc_return
202#define atomic_inc_return atomic_inc_return
184static __always_inline int atomic_inc_return(atomic_t *v) 203static __always_inline int atomic_inc_return(atomic_t *v)
185{ 204{
186 kasan_check_write(v, sizeof(*v)); 205 kasan_check_write(v, sizeof(*v));
187 return arch_atomic_inc_return(v); 206 return arch_atomic_inc_return(v);
188} 207}
208#endif
189 209
210#ifdef arch_atomic64_in_return
211#define atomic64_inc_return atomic64_inc_return
190static __always_inline s64 atomic64_inc_return(atomic64_t *v) 212static __always_inline s64 atomic64_inc_return(atomic64_t *v)
191{ 213{
192 kasan_check_write(v, sizeof(*v)); 214 kasan_check_write(v, sizeof(*v));
193 return arch_atomic64_inc_return(v); 215 return arch_atomic64_inc_return(v);
194} 216}
217#endif
195 218
219#ifdef arch_atomic_dec_return
220#define atomic_dec_return atomic_dec_return
196static __always_inline int atomic_dec_return(atomic_t *v) 221static __always_inline int atomic_dec_return(atomic_t *v)
197{ 222{
198 kasan_check_write(v, sizeof(*v)); 223 kasan_check_write(v, sizeof(*v));
199 return arch_atomic_dec_return(v); 224 return arch_atomic_dec_return(v);
200} 225}
226#endif
201 227
228#ifdef arch_atomic64_dec_return
229#define atomic64_dec_return atomic64_dec_return
202static __always_inline s64 atomic64_dec_return(atomic64_t *v) 230static __always_inline s64 atomic64_dec_return(atomic64_t *v)
203{ 231{
204 kasan_check_write(v, sizeof(*v)); 232 kasan_check_write(v, sizeof(*v));
205 return arch_atomic64_dec_return(v); 233 return arch_atomic64_dec_return(v);
206} 234}
235#endif
207 236
208static __always_inline s64 atomic64_inc_not_zero(atomic64_t *v) 237#ifdef arch_atomic64_inc_not_zero
238#define atomic64_inc_not_zero atomic64_inc_not_zero
239static __always_inline bool atomic64_inc_not_zero(atomic64_t *v)
209{ 240{
210 kasan_check_write(v, sizeof(*v)); 241 kasan_check_write(v, sizeof(*v));
211 return arch_atomic64_inc_not_zero(v); 242 return arch_atomic64_inc_not_zero(v);
212} 243}
244#endif
213 245
246#ifdef arch_atomic64_dec_if_positive
247#define atomic64_dec_if_positive atomic64_dec_if_positive
214static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) 248static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v)
215{ 249{
216 kasan_check_write(v, sizeof(*v)); 250 kasan_check_write(v, sizeof(*v));
217 return arch_atomic64_dec_if_positive(v); 251 return arch_atomic64_dec_if_positive(v);
218} 252}
253#endif
219 254
255#ifdef arch_atomic_dec_and_test
256#define atomic_dec_and_test atomic_dec_and_test
220static __always_inline bool atomic_dec_and_test(atomic_t *v) 257static __always_inline bool atomic_dec_and_test(atomic_t *v)
221{ 258{
222 kasan_check_write(v, sizeof(*v)); 259 kasan_check_write(v, sizeof(*v));
223 return arch_atomic_dec_and_test(v); 260 return arch_atomic_dec_and_test(v);
224} 261}
262#endif
225 263
264#ifdef arch_atomic64_dec_and_test
265#define atomic64_dec_and_test atomic64_dec_and_test
226static __always_inline bool atomic64_dec_and_test(atomic64_t *v) 266static __always_inline bool atomic64_dec_and_test(atomic64_t *v)
227{ 267{
228 kasan_check_write(v, sizeof(*v)); 268 kasan_check_write(v, sizeof(*v));
229 return arch_atomic64_dec_and_test(v); 269 return arch_atomic64_dec_and_test(v);
230} 270}
271#endif
231 272
273#ifdef arch_atomic_inc_and_test
274#define atomic_inc_and_test atomic_inc_and_test
232static __always_inline bool atomic_inc_and_test(atomic_t *v) 275static __always_inline bool atomic_inc_and_test(atomic_t *v)
233{ 276{
234 kasan_check_write(v, sizeof(*v)); 277 kasan_check_write(v, sizeof(*v));
235 return arch_atomic_inc_and_test(v); 278 return arch_atomic_inc_and_test(v);
236} 279}
280#endif
237 281
282#ifdef arch_atomic64_inc_and_test
283#define atomic64_inc_and_test atomic64_inc_and_test
238static __always_inline bool atomic64_inc_and_test(atomic64_t *v) 284static __always_inline bool atomic64_inc_and_test(atomic64_t *v)
239{ 285{
240 kasan_check_write(v, sizeof(*v)); 286 kasan_check_write(v, sizeof(*v));
241 return arch_atomic64_inc_and_test(v); 287 return arch_atomic64_inc_and_test(v);
242} 288}
289#endif
243 290
244static __always_inline int atomic_add_return(int i, atomic_t *v) 291static __always_inline int atomic_add_return(int i, atomic_t *v)
245{ 292{
@@ -325,152 +372,96 @@ static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v)
325 return arch_atomic64_fetch_xor(i, v); 372 return arch_atomic64_fetch_xor(i, v);
326} 373}
327 374
375#ifdef arch_atomic_sub_and_test
376#define atomic_sub_and_test atomic_sub_and_test
328static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) 377static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
329{ 378{
330 kasan_check_write(v, sizeof(*v)); 379 kasan_check_write(v, sizeof(*v));
331 return arch_atomic_sub_and_test(i, v); 380 return arch_atomic_sub_and_test(i, v);
332} 381}
382#endif
333 383
384#ifdef arch_atomic64_sub_and_test
385#define atomic64_sub_and_test atomic64_sub_and_test
334static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v) 386static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v)
335{ 387{
336 kasan_check_write(v, sizeof(*v)); 388 kasan_check_write(v, sizeof(*v));
337 return arch_atomic64_sub_and_test(i, v); 389 return arch_atomic64_sub_and_test(i, v);
338} 390}
391#endif
339 392
393#ifdef arch_atomic_add_negative
394#define atomic_add_negative atomic_add_negative
340static __always_inline bool atomic_add_negative(int i, atomic_t *v) 395static __always_inline bool atomic_add_negative(int i, atomic_t *v)
341{ 396{
342 kasan_check_write(v, sizeof(*v)); 397 kasan_check_write(v, sizeof(*v));
343 return arch_atomic_add_negative(i, v); 398 return arch_atomic_add_negative(i, v);
344} 399}
400#endif
345 401
402#ifdef arch_atomic64_add_negative
403#define atomic64_add_negative atomic64_add_negative
346static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) 404static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v)
347{ 405{
348 kasan_check_write(v, sizeof(*v)); 406 kasan_check_write(v, sizeof(*v));
349 return arch_atomic64_add_negative(i, v); 407 return arch_atomic64_add_negative(i, v);
350} 408}
409#endif
351 410
352static __always_inline unsigned long 411#define xchg(ptr, new) \
353cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new, int size) 412({ \
354{ 413 typeof(ptr) __ai_ptr = (ptr); \
355 kasan_check_write(ptr, size); 414 kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \
356 switch (size) { 415 arch_xchg(__ai_ptr, (new)); \
357 case 1: 416})
358 return arch_cmpxchg((u8 *)ptr, (u8)old, (u8)new);
359 case 2:
360 return arch_cmpxchg((u16 *)ptr, (u16)old, (u16)new);
361 case 4:
362 return arch_cmpxchg((u32 *)ptr, (u32)old, (u32)new);
363 case 8:
364 BUILD_BUG_ON(sizeof(unsigned long) != 8);
365 return arch_cmpxchg((u64 *)ptr, (u64)old, (u64)new);
366 }
367 BUILD_BUG();
368 return 0;
369}
370 417
371#define cmpxchg(ptr, old, new) \ 418#define cmpxchg(ptr, old, new) \
372({ \ 419({ \
373 ((__typeof__(*(ptr)))cmpxchg_size((ptr), (unsigned long)(old), \ 420 typeof(ptr) __ai_ptr = (ptr); \
374 (unsigned long)(new), sizeof(*(ptr)))); \ 421 kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \
422 arch_cmpxchg(__ai_ptr, (old), (new)); \
375}) 423})
376 424
377static __always_inline unsigned long
378sync_cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new,
379 int size)
380{
381 kasan_check_write(ptr, size);
382 switch (size) {
383 case 1:
384 return arch_sync_cmpxchg((u8 *)ptr, (u8)old, (u8)new);
385 case 2:
386 return arch_sync_cmpxchg((u16 *)ptr, (u16)old, (u16)new);
387 case 4:
388 return arch_sync_cmpxchg((u32 *)ptr, (u32)old, (u32)new);
389 case 8:
390 BUILD_BUG_ON(sizeof(unsigned long) != 8);
391 return arch_sync_cmpxchg((u64 *)ptr, (u64)old, (u64)new);
392 }
393 BUILD_BUG();
394 return 0;
395}
396
397#define sync_cmpxchg(ptr, old, new) \ 425#define sync_cmpxchg(ptr, old, new) \
398({ \ 426({ \
399 ((__typeof__(*(ptr)))sync_cmpxchg_size((ptr), \ 427 typeof(ptr) __ai_ptr = (ptr); \
400 (unsigned long)(old), (unsigned long)(new), \ 428 kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \
401 sizeof(*(ptr)))); \ 429 arch_sync_cmpxchg(__ai_ptr, (old), (new)); \
402}) 430})
403 431
404static __always_inline unsigned long
405cmpxchg_local_size(volatile void *ptr, unsigned long old, unsigned long new,
406 int size)
407{
408 kasan_check_write(ptr, size);
409 switch (size) {
410 case 1:
411 return arch_cmpxchg_local((u8 *)ptr, (u8)old, (u8)new);
412 case 2:
413 return arch_cmpxchg_local((u16 *)ptr, (u16)old, (u16)new);
414 case 4:
415 return arch_cmpxchg_local((u32 *)ptr, (u32)old, (u32)new);
416 case 8:
417 BUILD_BUG_ON(sizeof(unsigned long) != 8);
418 return arch_cmpxchg_local((u64 *)ptr, (u64)old, (u64)new);
419 }
420 BUILD_BUG();
421 return 0;
422}
423
424#define cmpxchg_local(ptr, old, new) \ 432#define cmpxchg_local(ptr, old, new) \
425({ \ 433({ \
426 ((__typeof__(*(ptr)))cmpxchg_local_size((ptr), \ 434 typeof(ptr) __ai_ptr = (ptr); \
427 (unsigned long)(old), (unsigned long)(new), \ 435 kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \
428 sizeof(*(ptr)))); \ 436 arch_cmpxchg_local(__ai_ptr, (old), (new)); \
429}) 437})
430 438
431static __always_inline u64
432cmpxchg64_size(volatile u64 *ptr, u64 old, u64 new)
433{
434 kasan_check_write(ptr, sizeof(*ptr));
435 return arch_cmpxchg64(ptr, old, new);
436}
437
438#define cmpxchg64(ptr, old, new) \ 439#define cmpxchg64(ptr, old, new) \
439({ \ 440({ \
440 ((__typeof__(*(ptr)))cmpxchg64_size((ptr), (u64)(old), \ 441 typeof(ptr) __ai_ptr = (ptr); \
441 (u64)(new))); \ 442 kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \
443 arch_cmpxchg64(__ai_ptr, (old), (new)); \
442}) 444})
443 445
444static __always_inline u64
445cmpxchg64_local_size(volatile u64 *ptr, u64 old, u64 new)
446{
447 kasan_check_write(ptr, sizeof(*ptr));
448 return arch_cmpxchg64_local(ptr, old, new);
449}
450
451#define cmpxchg64_local(ptr, old, new) \ 446#define cmpxchg64_local(ptr, old, new) \
452({ \ 447({ \
453 ((__typeof__(*(ptr)))cmpxchg64_local_size((ptr), (u64)(old), \ 448 typeof(ptr) __ai_ptr = (ptr); \
454 (u64)(new))); \ 449 kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \
450 arch_cmpxchg64_local(__ai_ptr, (old), (new)); \
455}) 451})
456 452
457/*
458 * Originally we had the following code here:
459 * __typeof__(p1) ____p1 = (p1);
460 * kasan_check_write(____p1, 2 * sizeof(*____p1));
461 * arch_cmpxchg_double(____p1, (p2), (o1), (o2), (n1), (n2));
462 * But it leads to compilation failures (see gcc issue 72873).
463 * So for now it's left non-instrumented.
464 * There are few callers of cmpxchg_double(), so it's not critical.
465 */
466#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ 453#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
467({ \ 454({ \
468 arch_cmpxchg_double((p1), (p2), (o1), (o2), (n1), (n2)); \ 455 typeof(p1) __ai_p1 = (p1); \
456 kasan_check_write(__ai_p1, 2 * sizeof(*__ai_p1)); \
457 arch_cmpxchg_double(__ai_p1, (p2), (o1), (o2), (n1), (n2)); \
469}) 458})
470 459
471#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ 460#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
472({ \ 461({ \
473 arch_cmpxchg_double_local((p1), (p2), (o1), (o2), (n1), (n2)); \ 462 typeof(p1) __ai_p1 = (p1); \
463 kasan_check_write(__ai_p1, 2 * sizeof(*__ai_p1)); \
464 arch_cmpxchg_double_local(__ai_p1, (p2), (o1), (o2), (n1), (n2)); \
474}) 465})
475 466
476#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ 467#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index abe6dd9ca2a8..13324aa828eb 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -186,11 +186,6 @@ ATOMIC_OP(xor, ^)
186 186
187#include <linux/irqflags.h> 187#include <linux/irqflags.h>
188 188
189static inline int atomic_add_negative(int i, atomic_t *v)
190{
191 return atomic_add_return(i, v) < 0;
192}
193
194static inline void atomic_add(int i, atomic_t *v) 189static inline void atomic_add(int i, atomic_t *v)
195{ 190{
196 atomic_add_return(i, v); 191 atomic_add_return(i, v);
@@ -201,35 +196,7 @@ static inline void atomic_sub(int i, atomic_t *v)
201 atomic_sub_return(i, v); 196 atomic_sub_return(i, v);
202} 197}
203 198
204static inline void atomic_inc(atomic_t *v)
205{
206 atomic_add_return(1, v);
207}
208
209static inline void atomic_dec(atomic_t *v)
210{
211 atomic_sub_return(1, v);
212}
213
214#define atomic_dec_return(v) atomic_sub_return(1, (v))
215#define atomic_inc_return(v) atomic_add_return(1, (v))
216
217#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
218#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
219#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
220
221#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) 199#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v)))
222#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) 200#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new)))
223 201
224#ifndef __atomic_add_unless
225static inline int __atomic_add_unless(atomic_t *v, int a, int u)
226{
227 int c, old;
228 c = atomic_read(v);
229 while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c)
230 c = old;
231 return c;
232}
233#endif
234
235#endif /* __ASM_GENERIC_ATOMIC_H */ 202#endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 8d28eb010d0d..97b28b7f1f29 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -11,6 +11,7 @@
11 */ 11 */
12#ifndef _ASM_GENERIC_ATOMIC64_H 12#ifndef _ASM_GENERIC_ATOMIC64_H
13#define _ASM_GENERIC_ATOMIC64_H 13#define _ASM_GENERIC_ATOMIC64_H
14#include <linux/types.h>
14 15
15typedef struct { 16typedef struct {
16 long long counter; 17 long long counter;
@@ -50,18 +51,10 @@ ATOMIC64_OPS(xor)
50#undef ATOMIC64_OP 51#undef ATOMIC64_OP
51 52
52extern long long atomic64_dec_if_positive(atomic64_t *v); 53extern long long atomic64_dec_if_positive(atomic64_t *v);
54#define atomic64_dec_if_positive atomic64_dec_if_positive
53extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n); 55extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
54extern long long atomic64_xchg(atomic64_t *v, long long new); 56extern long long atomic64_xchg(atomic64_t *v, long long new);
55extern int atomic64_add_unless(atomic64_t *v, long long a, long long u); 57extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
56 58#define atomic64_fetch_add_unless atomic64_fetch_add_unless
57#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
58#define atomic64_inc(v) atomic64_add(1LL, (v))
59#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
60#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
61#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
62#define atomic64_dec(v) atomic64_sub(1LL, (v))
63#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
64#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
65#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
66 59
67#endif /* _ASM_GENERIC_ATOMIC64_H */ 60#endif /* _ASM_GENERIC_ATOMIC64_H */
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index 04deffaf5f7d..dd90c9792909 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -2,189 +2,67 @@
2#ifndef _ASM_GENERIC_BITOPS_ATOMIC_H_ 2#ifndef _ASM_GENERIC_BITOPS_ATOMIC_H_
3#define _ASM_GENERIC_BITOPS_ATOMIC_H_ 3#define _ASM_GENERIC_BITOPS_ATOMIC_H_
4 4
5#include <asm/types.h> 5#include <linux/atomic.h>
6#include <linux/irqflags.h> 6#include <linux/compiler.h>
7 7#include <asm/barrier.h>
8#ifdef CONFIG_SMP
9#include <asm/spinlock.h>
10#include <asm/cache.h> /* we use L1_CACHE_BYTES */
11
12/* Use an array of spinlocks for our atomic_ts.
13 * Hash function to index into a different SPINLOCK.
14 * Since "a" is usually an address, use one spinlock per cacheline.
15 */
16# define ATOMIC_HASH_SIZE 4
17# define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
18
19extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
20
21/* Can't use raw_spin_lock_irq because of #include problems, so
22 * this is the substitute */
23#define _atomic_spin_lock_irqsave(l,f) do { \
24 arch_spinlock_t *s = ATOMIC_HASH(l); \
25 local_irq_save(f); \
26 arch_spin_lock(s); \
27} while(0)
28
29#define _atomic_spin_unlock_irqrestore(l,f) do { \
30 arch_spinlock_t *s = ATOMIC_HASH(l); \
31 arch_spin_unlock(s); \
32 local_irq_restore(f); \
33} while(0)
34
35
36#else
37# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0)
38# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0)
39#endif
40 8
41/* 9/*
42 * NMI events can occur at any time, including when interrupts have been 10 * Implementation of atomic bitops using atomic-fetch ops.
43 * disabled by *_irqsave(). So you can get NMI events occurring while a 11 * See Documentation/atomic_bitops.txt for details.
44 * *_bit function is holding a spin lock. If the NMI handler also wants
45 * to do bit manipulation (and they do) then you can get a deadlock
46 * between the original caller of *_bit() and the NMI handler.
47 *
48 * by Keith Owens
49 */ 12 */
50 13
51/** 14static inline void set_bit(unsigned int nr, volatile unsigned long *p)
52 * set_bit - Atomically set a bit in memory
53 * @nr: the bit to set
54 * @addr: the address to start counting from
55 *
56 * This function is atomic and may not be reordered. See __set_bit()
57 * if you do not require the atomic guarantees.
58 *
59 * Note: there are no guarantees that this function will not be reordered
60 * on non x86 architectures, so if you are writing portable code,
61 * make sure not to rely on its reordering guarantees.
62 *
63 * Note that @nr may be almost arbitrarily large; this function is not
64 * restricted to acting on a single-word quantity.
65 */
66static inline void set_bit(int nr, volatile unsigned long *addr)
67{ 15{
68 unsigned long mask = BIT_MASK(nr); 16 p += BIT_WORD(nr);
69 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); 17 atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p);
70 unsigned long flags;
71
72 _atomic_spin_lock_irqsave(p, flags);
73 *p |= mask;
74 _atomic_spin_unlock_irqrestore(p, flags);
75} 18}
76 19
77/** 20static inline void clear_bit(unsigned int nr, volatile unsigned long *p)
78 * clear_bit - Clears a bit in memory
79 * @nr: Bit to clear
80 * @addr: Address to start counting from
81 *
82 * clear_bit() is atomic and may not be reordered. However, it does
83 * not contain a memory barrier, so if it is used for locking purposes,
84 * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
85 * in order to ensure changes are visible on other processors.
86 */
87static inline void clear_bit(int nr, volatile unsigned long *addr)
88{ 21{
89 unsigned long mask = BIT_MASK(nr); 22 p += BIT_WORD(nr);
90 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); 23 atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p);
91 unsigned long flags;
92
93 _atomic_spin_lock_irqsave(p, flags);
94 *p &= ~mask;
95 _atomic_spin_unlock_irqrestore(p, flags);
96} 24}
97 25
98/** 26static inline void change_bit(unsigned int nr, volatile unsigned long *p)
99 * change_bit - Toggle a bit in memory
100 * @nr: Bit to change
101 * @addr: Address to start counting from
102 *
103 * change_bit() is atomic and may not be reordered. It may be
104 * reordered on other architectures than x86.
105 * Note that @nr may be almost arbitrarily large; this function is not
106 * restricted to acting on a single-word quantity.
107 */
108static inline void change_bit(int nr, volatile unsigned long *addr)
109{ 27{
110 unsigned long mask = BIT_MASK(nr); 28 p += BIT_WORD(nr);
111 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); 29 atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p);
112 unsigned long flags;
113
114 _atomic_spin_lock_irqsave(p, flags);
115 *p ^= mask;
116 _atomic_spin_unlock_irqrestore(p, flags);
117} 30}
118 31
119/** 32static inline int test_and_set_bit(unsigned int nr, volatile unsigned long *p)
120 * test_and_set_bit - Set a bit and return its old value
121 * @nr: Bit to set
122 * @addr: Address to count from
123 *
124 * This operation is atomic and cannot be reordered.
125 * It may be reordered on other architectures than x86.
126 * It also implies a memory barrier.
127 */
128static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
129{ 33{
34 long old;
130 unsigned long mask = BIT_MASK(nr); 35 unsigned long mask = BIT_MASK(nr);
131 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
132 unsigned long old;
133 unsigned long flags;
134 36
135 _atomic_spin_lock_irqsave(p, flags); 37 p += BIT_WORD(nr);
136 old = *p; 38 if (READ_ONCE(*p) & mask)
137 *p = old | mask; 39 return 1;
138 _atomic_spin_unlock_irqrestore(p, flags);
139 40
140 return (old & mask) != 0; 41 old = atomic_long_fetch_or(mask, (atomic_long_t *)p);
42 return !!(old & mask);
141} 43}
142 44
143/** 45static inline int test_and_clear_bit(unsigned int nr, volatile unsigned long *p)
144 * test_and_clear_bit - Clear a bit and return its old value
145 * @nr: Bit to clear
146 * @addr: Address to count from
147 *
148 * This operation is atomic and cannot be reordered.
149 * It can be reorderdered on other architectures other than x86.
150 * It also implies a memory barrier.
151 */
152static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
153{ 46{
47 long old;
154 unsigned long mask = BIT_MASK(nr); 48 unsigned long mask = BIT_MASK(nr);
155 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
156 unsigned long old;
157 unsigned long flags;
158 49
159 _atomic_spin_lock_irqsave(p, flags); 50 p += BIT_WORD(nr);
160 old = *p; 51 if (!(READ_ONCE(*p) & mask))
161 *p = old & ~mask; 52 return 0;
162 _atomic_spin_unlock_irqrestore(p, flags);
163 53
164 return (old & mask) != 0; 54 old = atomic_long_fetch_andnot(mask, (atomic_long_t *)p);
55 return !!(old & mask);
165} 56}
166 57
167/** 58static inline int test_and_change_bit(unsigned int nr, volatile unsigned long *p)
168 * test_and_change_bit - Change a bit and return its old value
169 * @nr: Bit to change
170 * @addr: Address to count from
171 *
172 * This operation is atomic and cannot be reordered.
173 * It also implies a memory barrier.
174 */
175static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
176{ 59{
60 long old;
177 unsigned long mask = BIT_MASK(nr); 61 unsigned long mask = BIT_MASK(nr);
178 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
179 unsigned long old;
180 unsigned long flags;
181
182 _atomic_spin_lock_irqsave(p, flags);
183 old = *p;
184 *p = old ^ mask;
185 _atomic_spin_unlock_irqrestore(p, flags);
186 62
187 return (old & mask) != 0; 63 p += BIT_WORD(nr);
64 old = atomic_long_fetch_xor(mask, (atomic_long_t *)p);
65 return !!(old & mask);
188} 66}
189 67
190#endif /* _ASM_GENERIC_BITOPS_ATOMIC_H */ 68#endif /* _ASM_GENERIC_BITOPS_ATOMIC_H */
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index 67ab280ad134..3ae021368f48 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -2,6 +2,10 @@
2#ifndef _ASM_GENERIC_BITOPS_LOCK_H_ 2#ifndef _ASM_GENERIC_BITOPS_LOCK_H_
3#define _ASM_GENERIC_BITOPS_LOCK_H_ 3#define _ASM_GENERIC_BITOPS_LOCK_H_
4 4
5#include <linux/atomic.h>
6#include <linux/compiler.h>
7#include <asm/barrier.h>
8
5/** 9/**
6 * test_and_set_bit_lock - Set a bit and return its old value, for lock 10 * test_and_set_bit_lock - Set a bit and return its old value, for lock
7 * @nr: Bit to set 11 * @nr: Bit to set
@@ -11,7 +15,20 @@
11 * the returned value is 0. 15 * the returned value is 0.
12 * It can be used to implement bit locks. 16 * It can be used to implement bit locks.
13 */ 17 */
14#define test_and_set_bit_lock(nr, addr) test_and_set_bit(nr, addr) 18static inline int test_and_set_bit_lock(unsigned int nr,
19 volatile unsigned long *p)
20{
21 long old;
22 unsigned long mask = BIT_MASK(nr);
23
24 p += BIT_WORD(nr);
25 if (READ_ONCE(*p) & mask)
26 return 1;
27
28 old = atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
29 return !!(old & mask);
30}
31
15 32
16/** 33/**
17 * clear_bit_unlock - Clear a bit in memory, for unlock 34 * clear_bit_unlock - Clear a bit in memory, for unlock
@@ -20,11 +37,11 @@
20 * 37 *
21 * This operation is atomic and provides release barrier semantics. 38 * This operation is atomic and provides release barrier semantics.
22 */ 39 */
23#define clear_bit_unlock(nr, addr) \ 40static inline void clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
24do { \ 41{
25 smp_mb__before_atomic(); \ 42 p += BIT_WORD(nr);
26 clear_bit(nr, addr); \ 43 atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p);
27} while (0) 44}
28 45
29/** 46/**
30 * __clear_bit_unlock - Clear a bit in memory, for unlock 47 * __clear_bit_unlock - Clear a bit in memory, for unlock
@@ -37,11 +54,38 @@ do { \
37 * 54 *
38 * See for example x86's implementation. 55 * See for example x86's implementation.
39 */ 56 */
40#define __clear_bit_unlock(nr, addr) \ 57static inline void __clear_bit_unlock(unsigned int nr,
41do { \ 58 volatile unsigned long *p)
42 smp_mb__before_atomic(); \ 59{
43 clear_bit(nr, addr); \ 60 unsigned long old;
44} while (0)
45 61
46#endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */ 62 p += BIT_WORD(nr);
63 old = READ_ONCE(*p);
64 old &= ~BIT_MASK(nr);
65 atomic_long_set_release((atomic_long_t *)p, old);
66}
67
68/**
69 * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
70 * byte is negative, for unlock.
71 * @nr: the bit to clear
72 * @addr: the address to start counting from
73 *
74 * This is a bit of a one-trick-pony for the filemap code, which clears
75 * PG_locked and tests PG_waiters,
76 */
77#ifndef clear_bit_unlock_is_negative_byte
78static inline bool clear_bit_unlock_is_negative_byte(unsigned int nr,
79 volatile unsigned long *p)
80{
81 long old;
82 unsigned long mask = BIT_MASK(nr);
83
84 p += BIT_WORD(nr);
85 old = atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
86 return !!(old & BIT(7));
87}
88#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
89#endif
47 90
91#endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f59639afaa39..b081794ba135 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1019,8 +1019,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
1019int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); 1019int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
1020int pud_clear_huge(pud_t *pud); 1020int pud_clear_huge(pud_t *pud);
1021int pmd_clear_huge(pmd_t *pmd); 1021int pmd_clear_huge(pmd_t *pmd);
1022int pud_free_pmd_page(pud_t *pud); 1022int pud_free_pmd_page(pud_t *pud, unsigned long addr);
1023int pmd_free_pte_page(pmd_t *pmd); 1023int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
1024#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 1024#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
1025static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 1025static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1026{ 1026{
@@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
1046{ 1046{
1047 return 0; 1047 return 0;
1048} 1048}
1049static inline int pud_free_pmd_page(pud_t *pud) 1049static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1050{ 1050{
1051 return 0; 1051 return 0;
1052} 1052}
1053static inline int pmd_free_pte_page(pmd_t *pmd) 1053static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1054{ 1054{
1055 return 0; 1055 return 0;
1056} 1056}
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 3063125197ad..e811ef7b8350 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -303,4 +303,14 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
303 303
304#define tlb_migrate_finish(mm) do {} while (0) 304#define tlb_migrate_finish(mm) do {} while (0)
305 305
306/*
307 * Used to flush the TLB when page tables are removed, when lazy
308 * TLB mode may cause a CPU to retain intermediate translations
309 * pointing to about-to-be-freed page table memory.
310 */
311#ifndef HAVE_TLB_FLUSH_REMOVE_TABLES
312#define tlb_flush_remove_tables(mm) do {} while (0)
313#define tlb_flush_remove_tables_local(mm) do {} while (0)
314#endif
315
306#endif /* _ASM_GENERIC__TLB_H */ 316#endif /* _ASM_GENERIC__TLB_H */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 01ce3997cb42..1e8e88bdaf09 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -2,6 +2,8 @@
2/* Atomic operations usable in machine independent code */ 2/* Atomic operations usable in machine independent code */
3#ifndef _LINUX_ATOMIC_H 3#ifndef _LINUX_ATOMIC_H
4#define _LINUX_ATOMIC_H 4#define _LINUX_ATOMIC_H
5#include <linux/types.h>
6
5#include <asm/atomic.h> 7#include <asm/atomic.h>
6#include <asm/barrier.h> 8#include <asm/barrier.h>
7 9
@@ -36,40 +38,46 @@
36 * barriers on top of the relaxed variant. In the case where the relaxed 38 * barriers on top of the relaxed variant. In the case where the relaxed
37 * variant is already fully ordered, no additional barriers are needed. 39 * variant is already fully ordered, no additional barriers are needed.
38 * 40 *
39 * Besides, if an arch has a special barrier for acquire/release, it could 41 * If an architecture overrides __atomic_acquire_fence() it will probably
40 * implement its own __atomic_op_* and use the same framework for building 42 * want to define smp_mb__after_spinlock().
41 * variants
42 *
43 * If an architecture overrides __atomic_op_acquire() it will probably want
44 * to define smp_mb__after_spinlock().
45 */ 43 */
46#ifndef __atomic_op_acquire 44#ifndef __atomic_acquire_fence
45#define __atomic_acquire_fence smp_mb__after_atomic
46#endif
47
48#ifndef __atomic_release_fence
49#define __atomic_release_fence smp_mb__before_atomic
50#endif
51
52#ifndef __atomic_pre_full_fence
53#define __atomic_pre_full_fence smp_mb__before_atomic
54#endif
55
56#ifndef __atomic_post_full_fence
57#define __atomic_post_full_fence smp_mb__after_atomic
58#endif
59
47#define __atomic_op_acquire(op, args...) \ 60#define __atomic_op_acquire(op, args...) \
48({ \ 61({ \
49 typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ 62 typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \
50 smp_mb__after_atomic(); \ 63 __atomic_acquire_fence(); \
51 __ret; \ 64 __ret; \
52}) 65})
53#endif
54 66
55#ifndef __atomic_op_release
56#define __atomic_op_release(op, args...) \ 67#define __atomic_op_release(op, args...) \
57({ \ 68({ \
58 smp_mb__before_atomic(); \ 69 __atomic_release_fence(); \
59 op##_relaxed(args); \ 70 op##_relaxed(args); \
60}) 71})
61#endif
62 72
63#ifndef __atomic_op_fence
64#define __atomic_op_fence(op, args...) \ 73#define __atomic_op_fence(op, args...) \
65({ \ 74({ \
66 typeof(op##_relaxed(args)) __ret; \ 75 typeof(op##_relaxed(args)) __ret; \
67 smp_mb__before_atomic(); \ 76 __atomic_pre_full_fence(); \
68 __ret = op##_relaxed(args); \ 77 __ret = op##_relaxed(args); \
69 smp_mb__after_atomic(); \ 78 __atomic_post_full_fence(); \
70 __ret; \ 79 __ret; \
71}) 80})
72#endif
73 81
74/* atomic_add_return_relaxed */ 82/* atomic_add_return_relaxed */
75#ifndef atomic_add_return_relaxed 83#ifndef atomic_add_return_relaxed
@@ -95,11 +103,23 @@
95#endif 103#endif
96#endif /* atomic_add_return_relaxed */ 104#endif /* atomic_add_return_relaxed */
97 105
106#ifndef atomic_inc
107#define atomic_inc(v) atomic_add(1, (v))
108#endif
109
98/* atomic_inc_return_relaxed */ 110/* atomic_inc_return_relaxed */
99#ifndef atomic_inc_return_relaxed 111#ifndef atomic_inc_return_relaxed
112
113#ifndef atomic_inc_return
114#define atomic_inc_return(v) atomic_add_return(1, (v))
115#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v))
116#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v))
117#define atomic_inc_return_release(v) atomic_add_return_release(1, (v))
118#else /* atomic_inc_return */
100#define atomic_inc_return_relaxed atomic_inc_return 119#define atomic_inc_return_relaxed atomic_inc_return
101#define atomic_inc_return_acquire atomic_inc_return 120#define atomic_inc_return_acquire atomic_inc_return
102#define atomic_inc_return_release atomic_inc_return 121#define atomic_inc_return_release atomic_inc_return
122#endif /* atomic_inc_return */
103 123
104#else /* atomic_inc_return_relaxed */ 124#else /* atomic_inc_return_relaxed */
105 125
@@ -143,11 +163,23 @@
143#endif 163#endif
144#endif /* atomic_sub_return_relaxed */ 164#endif /* atomic_sub_return_relaxed */
145 165
166#ifndef atomic_dec
167#define atomic_dec(v) atomic_sub(1, (v))
168#endif
169
146/* atomic_dec_return_relaxed */ 170/* atomic_dec_return_relaxed */
147#ifndef atomic_dec_return_relaxed 171#ifndef atomic_dec_return_relaxed
172
173#ifndef atomic_dec_return
174#define atomic_dec_return(v) atomic_sub_return(1, (v))
175#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v))
176#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v))
177#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v))
178#else /* atomic_dec_return */
148#define atomic_dec_return_relaxed atomic_dec_return 179#define atomic_dec_return_relaxed atomic_dec_return
149#define atomic_dec_return_acquire atomic_dec_return 180#define atomic_dec_return_acquire atomic_dec_return
150#define atomic_dec_return_release atomic_dec_return 181#define atomic_dec_return_release atomic_dec_return
182#endif /* atomic_dec_return */
151 183
152#else /* atomic_dec_return_relaxed */ 184#else /* atomic_dec_return_relaxed */
153 185
@@ -328,12 +360,22 @@
328#endif 360#endif
329#endif /* atomic_fetch_and_relaxed */ 361#endif /* atomic_fetch_and_relaxed */
330 362
331#ifdef atomic_andnot 363#ifndef atomic_andnot
332/* atomic_fetch_andnot_relaxed */ 364#define atomic_andnot(i, v) atomic_and(~(int)(i), (v))
365#endif
366
333#ifndef atomic_fetch_andnot_relaxed 367#ifndef atomic_fetch_andnot_relaxed
334#define atomic_fetch_andnot_relaxed atomic_fetch_andnot 368
335#define atomic_fetch_andnot_acquire atomic_fetch_andnot 369#ifndef atomic_fetch_andnot
336#define atomic_fetch_andnot_release atomic_fetch_andnot 370#define atomic_fetch_andnot(i, v) atomic_fetch_and(~(int)(i), (v))
371#define atomic_fetch_andnot_relaxed(i, v) atomic_fetch_and_relaxed(~(int)(i), (v))
372#define atomic_fetch_andnot_acquire(i, v) atomic_fetch_and_acquire(~(int)(i), (v))
373#define atomic_fetch_andnot_release(i, v) atomic_fetch_and_release(~(int)(i), (v))
374#else /* atomic_fetch_andnot */
375#define atomic_fetch_andnot_relaxed atomic_fetch_andnot
376#define atomic_fetch_andnot_acquire atomic_fetch_andnot
377#define atomic_fetch_andnot_release atomic_fetch_andnot
378#endif /* atomic_fetch_andnot */
337 379
338#else /* atomic_fetch_andnot_relaxed */ 380#else /* atomic_fetch_andnot_relaxed */
339 381
@@ -352,7 +394,6 @@
352 __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__) 394 __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
353#endif 395#endif
354#endif /* atomic_fetch_andnot_relaxed */ 396#endif /* atomic_fetch_andnot_relaxed */
355#endif /* atomic_andnot */
356 397
357/* atomic_fetch_xor_relaxed */ 398/* atomic_fetch_xor_relaxed */
358#ifndef atomic_fetch_xor_relaxed 399#ifndef atomic_fetch_xor_relaxed
@@ -520,112 +561,140 @@
520#endif /* xchg_relaxed */ 561#endif /* xchg_relaxed */
521 562
522/** 563/**
564 * atomic_fetch_add_unless - add unless the number is already a given value
565 * @v: pointer of type atomic_t
566 * @a: the amount to add to v...
567 * @u: ...unless v is equal to u.
568 *
569 * Atomically adds @a to @v, if @v was not already @u.
570 * Returns the original value of @v.
571 */
572#ifndef atomic_fetch_add_unless
573static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
574{
575 int c = atomic_read(v);
576
577 do {
578 if (unlikely(c == u))
579 break;
580 } while (!atomic_try_cmpxchg(v, &c, c + a));
581
582 return c;
583}
584#endif
585
586/**
523 * atomic_add_unless - add unless the number is already a given value 587 * atomic_add_unless - add unless the number is already a given value
524 * @v: pointer of type atomic_t 588 * @v: pointer of type atomic_t
525 * @a: the amount to add to v... 589 * @a: the amount to add to v...
526 * @u: ...unless v is equal to u. 590 * @u: ...unless v is equal to u.
527 * 591 *
528 * Atomically adds @a to @v, so long as @v was not already @u. 592 * Atomically adds @a to @v, if @v was not already @u.
529 * Returns non-zero if @v was not @u, and zero otherwise. 593 * Returns true if the addition was done.
530 */ 594 */
531static inline int atomic_add_unless(atomic_t *v, int a, int u) 595static inline bool atomic_add_unless(atomic_t *v, int a, int u)
532{ 596{
533 return __atomic_add_unless(v, a, u) != u; 597 return atomic_fetch_add_unless(v, a, u) != u;
534} 598}
535 599
536/** 600/**
537 * atomic_inc_not_zero - increment unless the number is zero 601 * atomic_inc_not_zero - increment unless the number is zero
538 * @v: pointer of type atomic_t 602 * @v: pointer of type atomic_t
539 * 603 *
540 * Atomically increments @v by 1, so long as @v is non-zero. 604 * Atomically increments @v by 1, if @v is non-zero.
541 * Returns non-zero if @v was non-zero, and zero otherwise. 605 * Returns true if the increment was done.
542 */ 606 */
543#ifndef atomic_inc_not_zero 607#ifndef atomic_inc_not_zero
544#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) 608#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
545#endif 609#endif
546 610
547#ifndef atomic_andnot 611/**
548static inline void atomic_andnot(int i, atomic_t *v) 612 * atomic_inc_and_test - increment and test
549{ 613 * @v: pointer of type atomic_t
550 atomic_and(~i, v); 614 *
551} 615 * Atomically increments @v by 1
552 616 * and returns true if the result is zero, or false for all
553static inline int atomic_fetch_andnot(int i, atomic_t *v) 617 * other cases.
554{ 618 */
555 return atomic_fetch_and(~i, v); 619#ifndef atomic_inc_and_test
556} 620static inline bool atomic_inc_and_test(atomic_t *v)
557
558static inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v)
559{ 621{
560 return atomic_fetch_and_relaxed(~i, v); 622 return atomic_inc_return(v) == 0;
561} 623}
624#endif
562 625
563static inline int atomic_fetch_andnot_acquire(int i, atomic_t *v) 626/**
627 * atomic_dec_and_test - decrement and test
628 * @v: pointer of type atomic_t
629 *
630 * Atomically decrements @v by 1 and
631 * returns true if the result is 0, or false for all other
632 * cases.
633 */
634#ifndef atomic_dec_and_test
635static inline bool atomic_dec_and_test(atomic_t *v)
564{ 636{
565 return atomic_fetch_and_acquire(~i, v); 637 return atomic_dec_return(v) == 0;
566} 638}
639#endif
567 640
568static inline int atomic_fetch_andnot_release(int i, atomic_t *v) 641/**
642 * atomic_sub_and_test - subtract value from variable and test result
643 * @i: integer value to subtract
644 * @v: pointer of type atomic_t
645 *
646 * Atomically subtracts @i from @v and returns
647 * true if the result is zero, or false for all
648 * other cases.
649 */
650#ifndef atomic_sub_and_test
651static inline bool atomic_sub_and_test(int i, atomic_t *v)
569{ 652{
570 return atomic_fetch_and_release(~i, v); 653 return atomic_sub_return(i, v) == 0;
571} 654}
572#endif 655#endif
573 656
574/** 657/**
575 * atomic_inc_not_zero_hint - increment if not null 658 * atomic_add_negative - add and test if negative
659 * @i: integer value to add
576 * @v: pointer of type atomic_t 660 * @v: pointer of type atomic_t
577 * @hint: probable value of the atomic before the increment
578 *
579 * This version of atomic_inc_not_zero() gives a hint of probable
580 * value of the atomic. This helps processor to not read the memory
581 * before doing the atomic read/modify/write cycle, lowering
582 * number of bus transactions on some arches.
583 * 661 *
584 * Returns: 0 if increment was not done, 1 otherwise. 662 * Atomically adds @i to @v and returns true
663 * if the result is negative, or false when
664 * result is greater than or equal to zero.
585 */ 665 */
586#ifndef atomic_inc_not_zero_hint 666#ifndef atomic_add_negative
587static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint) 667static inline bool atomic_add_negative(int i, atomic_t *v)
588{ 668{
589 int val, c = hint; 669 return atomic_add_return(i, v) < 0;
590
591 /* sanity test, should be removed by compiler if hint is a constant */
592 if (!hint)
593 return atomic_inc_not_zero(v);
594
595 do {
596 val = atomic_cmpxchg(v, c, c + 1);
597 if (val == c)
598 return 1;
599 c = val;
600 } while (c);
601
602 return 0;
603} 670}
604#endif 671#endif
605 672
606#ifndef atomic_inc_unless_negative 673#ifndef atomic_inc_unless_negative
607static inline int atomic_inc_unless_negative(atomic_t *p) 674static inline bool atomic_inc_unless_negative(atomic_t *v)
608{ 675{
609 int v, v1; 676 int c = atomic_read(v);
610 for (v = 0; v >= 0; v = v1) { 677
611 v1 = atomic_cmpxchg(p, v, v + 1); 678 do {
612 if (likely(v1 == v)) 679 if (unlikely(c < 0))
613 return 1; 680 return false;
614 } 681 } while (!atomic_try_cmpxchg(v, &c, c + 1));
615 return 0; 682
683 return true;
616} 684}
617#endif 685#endif
618 686
619#ifndef atomic_dec_unless_positive 687#ifndef atomic_dec_unless_positive
620static inline int atomic_dec_unless_positive(atomic_t *p) 688static inline bool atomic_dec_unless_positive(atomic_t *v)
621{ 689{
622 int v, v1; 690 int c = atomic_read(v);
623 for (v = 0; v <= 0; v = v1) { 691
624 v1 = atomic_cmpxchg(p, v, v - 1); 692 do {
625 if (likely(v1 == v)) 693 if (unlikely(c > 0))
626 return 1; 694 return false;
627 } 695 } while (!atomic_try_cmpxchg(v, &c, c - 1));
628 return 0; 696
697 return true;
629} 698}
630#endif 699#endif
631 700
@@ -639,17 +708,14 @@ static inline int atomic_dec_unless_positive(atomic_t *p)
639#ifndef atomic_dec_if_positive 708#ifndef atomic_dec_if_positive
640static inline int atomic_dec_if_positive(atomic_t *v) 709static inline int atomic_dec_if_positive(atomic_t *v)
641{ 710{
642 int c, old, dec; 711 int dec, c = atomic_read(v);
643 c = atomic_read(v); 712
644 for (;;) { 713 do {
645 dec = c - 1; 714 dec = c - 1;
646 if (unlikely(dec < 0)) 715 if (unlikely(dec < 0))
647 break; 716 break;
648 old = atomic_cmpxchg((v), c, dec); 717 } while (!atomic_try_cmpxchg(v, &c, dec));
649 if (likely(old == c)) 718
650 break;
651 c = old;
652 }
653 return dec; 719 return dec;
654} 720}
655#endif 721#endif
@@ -693,11 +759,23 @@ static inline int atomic_dec_if_positive(atomic_t *v)
693#endif 759#endif
694#endif /* atomic64_add_return_relaxed */ 760#endif /* atomic64_add_return_relaxed */
695 761
762#ifndef atomic64_inc
763#define atomic64_inc(v) atomic64_add(1, (v))
764#endif
765
696/* atomic64_inc_return_relaxed */ 766/* atomic64_inc_return_relaxed */
697#ifndef atomic64_inc_return_relaxed 767#ifndef atomic64_inc_return_relaxed
768
769#ifndef atomic64_inc_return
770#define atomic64_inc_return(v) atomic64_add_return(1, (v))
771#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v))
772#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v))
773#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v))
774#else /* atomic64_inc_return */
698#define atomic64_inc_return_relaxed atomic64_inc_return 775#define atomic64_inc_return_relaxed atomic64_inc_return
699#define atomic64_inc_return_acquire atomic64_inc_return 776#define atomic64_inc_return_acquire atomic64_inc_return
700#define atomic64_inc_return_release atomic64_inc_return 777#define atomic64_inc_return_release atomic64_inc_return
778#endif /* atomic64_inc_return */
701 779
702#else /* atomic64_inc_return_relaxed */ 780#else /* atomic64_inc_return_relaxed */
703 781
@@ -742,11 +820,23 @@ static inline int atomic_dec_if_positive(atomic_t *v)
742#endif 820#endif
743#endif /* atomic64_sub_return_relaxed */ 821#endif /* atomic64_sub_return_relaxed */
744 822
823#ifndef atomic64_dec
824#define atomic64_dec(v) atomic64_sub(1, (v))
825#endif
826
745/* atomic64_dec_return_relaxed */ 827/* atomic64_dec_return_relaxed */
746#ifndef atomic64_dec_return_relaxed 828#ifndef atomic64_dec_return_relaxed
829
830#ifndef atomic64_dec_return
831#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
832#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v))
833#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v))
834#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
835#else /* atomic64_dec_return */
747#define atomic64_dec_return_relaxed atomic64_dec_return 836#define atomic64_dec_return_relaxed atomic64_dec_return
748#define atomic64_dec_return_acquire atomic64_dec_return 837#define atomic64_dec_return_acquire atomic64_dec_return
749#define atomic64_dec_return_release atomic64_dec_return 838#define atomic64_dec_return_release atomic64_dec_return
839#endif /* atomic64_dec_return */
750 840
751#else /* atomic64_dec_return_relaxed */ 841#else /* atomic64_dec_return_relaxed */
752 842
@@ -927,12 +1017,22 @@ static inline int atomic_dec_if_positive(atomic_t *v)
927#endif 1017#endif
928#endif /* atomic64_fetch_and_relaxed */ 1018#endif /* atomic64_fetch_and_relaxed */
929 1019
930#ifdef atomic64_andnot 1020#ifndef atomic64_andnot
931/* atomic64_fetch_andnot_relaxed */ 1021#define atomic64_andnot(i, v) atomic64_and(~(long long)(i), (v))
1022#endif
1023
932#ifndef atomic64_fetch_andnot_relaxed 1024#ifndef atomic64_fetch_andnot_relaxed
933#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot 1025
934#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot 1026#ifndef atomic64_fetch_andnot
935#define atomic64_fetch_andnot_release atomic64_fetch_andnot 1027#define atomic64_fetch_andnot(i, v) atomic64_fetch_and(~(long long)(i), (v))
1028#define atomic64_fetch_andnot_relaxed(i, v) atomic64_fetch_and_relaxed(~(long long)(i), (v))
1029#define atomic64_fetch_andnot_acquire(i, v) atomic64_fetch_and_acquire(~(long long)(i), (v))
1030#define atomic64_fetch_andnot_release(i, v) atomic64_fetch_and_release(~(long long)(i), (v))
1031#else /* atomic64_fetch_andnot */
1032#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot
1033#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot
1034#define atomic64_fetch_andnot_release atomic64_fetch_andnot
1035#endif /* atomic64_fetch_andnot */
936 1036
937#else /* atomic64_fetch_andnot_relaxed */ 1037#else /* atomic64_fetch_andnot_relaxed */
938 1038
@@ -951,7 +1051,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
951 __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__) 1051 __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__)
952#endif 1052#endif
953#endif /* atomic64_fetch_andnot_relaxed */ 1053#endif /* atomic64_fetch_andnot_relaxed */
954#endif /* atomic64_andnot */
955 1054
956/* atomic64_fetch_xor_relaxed */ 1055/* atomic64_fetch_xor_relaxed */
957#ifndef atomic64_fetch_xor_relaxed 1056#ifndef atomic64_fetch_xor_relaxed
@@ -1049,30 +1148,164 @@ static inline int atomic_dec_if_positive(atomic_t *v)
1049#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg 1148#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg
1050#endif /* atomic64_try_cmpxchg */ 1149#endif /* atomic64_try_cmpxchg */
1051 1150
1052#ifndef atomic64_andnot 1151/**
1053static inline void atomic64_andnot(long long i, atomic64_t *v) 1152 * atomic64_fetch_add_unless - add unless the number is already a given value
1153 * @v: pointer of type atomic64_t
1154 * @a: the amount to add to v...
1155 * @u: ...unless v is equal to u.
1156 *
1157 * Atomically adds @a to @v, if @v was not already @u.
1158 * Returns the original value of @v.
1159 */
1160#ifndef atomic64_fetch_add_unless
1161static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
1162 long long u)
1054{ 1163{
1055 atomic64_and(~i, v); 1164 long long c = atomic64_read(v);
1165
1166 do {
1167 if (unlikely(c == u))
1168 break;
1169 } while (!atomic64_try_cmpxchg(v, &c, c + a));
1170
1171 return c;
1056} 1172}
1173#endif
1057 1174
1058static inline long long atomic64_fetch_andnot(long long i, atomic64_t *v) 1175/**
1176 * atomic64_add_unless - add unless the number is already a given value
1177 * @v: pointer of type atomic_t
1178 * @a: the amount to add to v...
1179 * @u: ...unless v is equal to u.
1180 *
1181 * Atomically adds @a to @v, if @v was not already @u.
1182 * Returns true if the addition was done.
1183 */
1184static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
1059{ 1185{
1060 return atomic64_fetch_and(~i, v); 1186 return atomic64_fetch_add_unless(v, a, u) != u;
1061} 1187}
1062 1188
1063static inline long long atomic64_fetch_andnot_relaxed(long long i, atomic64_t *v) 1189/**
1190 * atomic64_inc_not_zero - increment unless the number is zero
1191 * @v: pointer of type atomic64_t
1192 *
1193 * Atomically increments @v by 1, if @v is non-zero.
1194 * Returns true if the increment was done.
1195 */
1196#ifndef atomic64_inc_not_zero
1197#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
1198#endif
1199
1200/**
1201 * atomic64_inc_and_test - increment and test
1202 * @v: pointer of type atomic64_t
1203 *
1204 * Atomically increments @v by 1
1205 * and returns true if the result is zero, or false for all
1206 * other cases.
1207 */
1208#ifndef atomic64_inc_and_test
1209static inline bool atomic64_inc_and_test(atomic64_t *v)
1064{ 1210{
1065 return atomic64_fetch_and_relaxed(~i, v); 1211 return atomic64_inc_return(v) == 0;
1066} 1212}
1213#endif
1067 1214
1068static inline long long atomic64_fetch_andnot_acquire(long long i, atomic64_t *v) 1215/**
1216 * atomic64_dec_and_test - decrement and test
1217 * @v: pointer of type atomic64_t
1218 *
1219 * Atomically decrements @v by 1 and
1220 * returns true if the result is 0, or false for all other
1221 * cases.
1222 */
1223#ifndef atomic64_dec_and_test
1224static inline bool atomic64_dec_and_test(atomic64_t *v)
1069{ 1225{
1070 return atomic64_fetch_and_acquire(~i, v); 1226 return atomic64_dec_return(v) == 0;
1071} 1227}
1228#endif
1072 1229
1073static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v) 1230/**
1231 * atomic64_sub_and_test - subtract value from variable and test result
1232 * @i: integer value to subtract
1233 * @v: pointer of type atomic64_t
1234 *
1235 * Atomically subtracts @i from @v and returns
1236 * true if the result is zero, or false for all
1237 * other cases.
1238 */
1239#ifndef atomic64_sub_and_test
1240static inline bool atomic64_sub_and_test(long long i, atomic64_t *v)
1241{
1242 return atomic64_sub_return(i, v) == 0;
1243}
1244#endif
1245
1246/**
1247 * atomic64_add_negative - add and test if negative
1248 * @i: integer value to add
1249 * @v: pointer of type atomic64_t
1250 *
1251 * Atomically adds @i to @v and returns true
1252 * if the result is negative, or false when
1253 * result is greater than or equal to zero.
1254 */
1255#ifndef atomic64_add_negative
1256static inline bool atomic64_add_negative(long long i, atomic64_t *v)
1074{ 1257{
1075 return atomic64_fetch_and_release(~i, v); 1258 return atomic64_add_return(i, v) < 0;
1259}
1260#endif
1261
1262#ifndef atomic64_inc_unless_negative
1263static inline bool atomic64_inc_unless_negative(atomic64_t *v)
1264{
1265 long long c = atomic64_read(v);
1266
1267 do {
1268 if (unlikely(c < 0))
1269 return false;
1270 } while (!atomic64_try_cmpxchg(v, &c, c + 1));
1271
1272 return true;
1273}
1274#endif
1275
1276#ifndef atomic64_dec_unless_positive
1277static inline bool atomic64_dec_unless_positive(atomic64_t *v)
1278{
1279 long long c = atomic64_read(v);
1280
1281 do {
1282 if (unlikely(c > 0))
1283 return false;
1284 } while (!atomic64_try_cmpxchg(v, &c, c - 1));
1285
1286 return true;
1287}
1288#endif
1289
1290/*
1291 * atomic64_dec_if_positive - decrement by 1 if old value positive
1292 * @v: pointer of type atomic64_t
1293 *
1294 * The function returns the old value of *v minus 1, even if
1295 * the atomic64 variable, v, was not decremented.
1296 */
1297#ifndef atomic64_dec_if_positive
1298static inline long long atomic64_dec_if_positive(atomic64_t *v)
1299{
1300 long long dec, c = atomic64_read(v);
1301
1302 do {
1303 dec = c - 1;
1304 if (unlikely(dec < 0))
1305 break;
1306 } while (!atomic64_try_cmpxchg(v, &c, dec));
1307
1308 return dec;
1076} 1309}
1077#endif 1310#endif
1078 1311
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 4cac4e1a72ff..af419012d77d 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -2,29 +2,9 @@
2#ifndef _LINUX_BITOPS_H 2#ifndef _LINUX_BITOPS_H
3#define _LINUX_BITOPS_H 3#define _LINUX_BITOPS_H
4#include <asm/types.h> 4#include <asm/types.h>
5#include <linux/bits.h>
5 6
6#ifdef __KERNEL__
7#define BIT(nr) (1UL << (nr))
8#define BIT_ULL(nr) (1ULL << (nr))
9#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
10#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
11#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG))
12#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG)
13#define BITS_PER_BYTE 8
14#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) 7#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
15#endif
16
17/*
18 * Create a contiguous bitmask starting at bit position @l and ending at
19 * position @h. For example
20 * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
21 */
22#define GENMASK(h, l) \
23 (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
24
25#define GENMASK_ULL(h, l) \
26 (((~0ULL) - (1ULL << (l)) + 1) & \
27 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
28 8
29extern unsigned int __sw_hweight8(unsigned int w); 9extern unsigned int __sw_hweight8(unsigned int w);
30extern unsigned int __sw_hweight16(unsigned int w); 10extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/bits.h b/include/linux/bits.h
new file mode 100644
index 000000000000..2b7b532c1d51
--- /dev/null
+++ b/include/linux/bits.h
@@ -0,0 +1,26 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __LINUX_BITS_H
3#define __LINUX_BITS_H
4#include <asm/bitsperlong.h>
5
6#define BIT(nr) (1UL << (nr))
7#define BIT_ULL(nr) (1ULL << (nr))
8#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
9#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
10#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG))
11#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG)
12#define BITS_PER_BYTE 8
13
14/*
15 * Create a contiguous bitmask starting at bit position @l and ending at
16 * position @h. For example
17 * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
18 */
19#define GENMASK(h, l) \
20 (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
21
22#define GENMASK_ULL(h, l) \
23 (((~0ULL) - (1ULL << (l)) + 1) & \
24 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
25
26#endif /* __LINUX_BITS_H */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7dff1963c185..308918928767 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -194,6 +194,9 @@ extern void clocksource_suspend(void);
194extern void clocksource_resume(void); 194extern void clocksource_resume(void);
195extern struct clocksource * __init clocksource_default_clock(void); 195extern struct clocksource * __init clocksource_default_clock(void);
196extern void clocksource_mark_unstable(struct clocksource *cs); 196extern void clocksource_mark_unstable(struct clocksource *cs);
197extern void
198clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles);
199extern u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 now);
197 200
198extern u64 201extern u64
199clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles); 202clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index c68acc47da57..df45ee8413d6 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -115,11 +115,6 @@ typedef compat_ulong_t compat_aio_context_t;
115struct compat_sel_arg_struct; 115struct compat_sel_arg_struct;
116struct rusage; 116struct rusage;
117 117
118struct compat_itimerspec {
119 struct compat_timespec it_interval;
120 struct compat_timespec it_value;
121};
122
123struct compat_utimbuf { 118struct compat_utimbuf {
124 compat_time_t actime; 119 compat_time_t actime;
125 compat_time_t modtime; 120 compat_time_t modtime;
@@ -300,10 +295,6 @@ extern int compat_get_timespec(struct timespec *, const void __user *);
300extern int compat_put_timespec(const struct timespec *, void __user *); 295extern int compat_put_timespec(const struct timespec *, void __user *);
301extern int compat_get_timeval(struct timeval *, const void __user *); 296extern int compat_get_timeval(struct timeval *, const void __user *);
302extern int compat_put_timeval(const struct timeval *, void __user *); 297extern int compat_put_timeval(const struct timeval *, void __user *);
303extern int get_compat_itimerspec64(struct itimerspec64 *its,
304 const struct compat_itimerspec __user *uits);
305extern int put_compat_itimerspec64(const struct itimerspec64 *its,
306 struct compat_itimerspec __user *uits);
307 298
308struct compat_iovec { 299struct compat_iovec {
309 compat_uptr_t iov_base; 300 compat_uptr_t iov_base;
diff --git a/include/linux/compat_time.h b/include/linux/compat_time.h
index 31f2774f1994..e70bfd1d2c3f 100644
--- a/include/linux/compat_time.h
+++ b/include/linux/compat_time.h
@@ -17,7 +17,16 @@ struct compat_timeval {
17 s32 tv_usec; 17 s32 tv_usec;
18}; 18};
19 19
20struct compat_itimerspec {
21 struct compat_timespec it_interval;
22 struct compat_timespec it_value;
23};
24
20extern int compat_get_timespec64(struct timespec64 *, const void __user *); 25extern int compat_get_timespec64(struct timespec64 *, const void __user *);
21extern int compat_put_timespec64(const struct timespec64 *, void __user *); 26extern int compat_put_timespec64(const struct timespec64 *, void __user *);
27extern int get_compat_itimerspec64(struct itimerspec64 *its,
28 const struct compat_itimerspec __user *uits);
29extern int put_compat_itimerspec64(const struct itimerspec64 *its,
30 struct compat_itimerspec __user *uits);
22 31
23#endif /* _LINUX_COMPAT_TIME_H */ 32#endif /* _LINUX_COMPAT_TIME_H */
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a97a63eef59f..3233fbe23594 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -30,7 +30,7 @@ struct cpu {
30}; 30};
31 31
32extern void boot_cpu_init(void); 32extern void boot_cpu_init(void);
33extern void boot_cpu_state_init(void); 33extern void boot_cpu_hotplug_init(void);
34extern void cpu_init(void); 34extern void cpu_init(void);
35extern void trap_init(void); 35extern void trap_init(void);
36 36
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..4cf06a64bc02 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -164,6 +164,7 @@ enum cpuhp_state {
164 CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, 164 CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
165 CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, 165 CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
166 CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, 166 CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
167 CPUHP_AP_WATCHDOG_ONLINE,
167 CPUHP_AP_WORKQUEUE_ONLINE, 168 CPUHP_AP_WORKQUEUE_ONLINE,
168 CPUHP_AP_RCUTREE_ONLINE, 169 CPUHP_AP_RCUTREE_ONLINE,
169 CPUHP_AP_ONLINE_DYN, 170 CPUHP_AP_ONLINE_DYN,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 56add823f190..401e4b254e30 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -894,6 +894,16 @@ typedef struct _efi_file_handle {
894 void *flush; 894 void *flush;
895} efi_file_handle_t; 895} efi_file_handle_t;
896 896
897typedef struct {
898 u64 revision;
899 u32 open_volume;
900} efi_file_io_interface_32_t;
901
902typedef struct {
903 u64 revision;
904 u64 open_volume;
905} efi_file_io_interface_64_t;
906
897typedef struct _efi_file_io_interface { 907typedef struct _efi_file_io_interface {
898 u64 revision; 908 u64 revision;
899 int (*open_volume)(struct _efi_file_io_interface *, 909 int (*open_volume)(struct _efi_file_io_interface *,
@@ -988,14 +998,12 @@ extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
988extern void efi_gettimeofday (struct timespec64 *ts); 998extern void efi_gettimeofday (struct timespec64 *ts);
989extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ 999extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */
990#ifdef CONFIG_X86 1000#ifdef CONFIG_X86
991extern void efi_late_init(void);
992extern void efi_free_boot_services(void); 1001extern void efi_free_boot_services(void);
993extern efi_status_t efi_query_variable_store(u32 attributes, 1002extern efi_status_t efi_query_variable_store(u32 attributes,
994 unsigned long size, 1003 unsigned long size,
995 bool nonblocking); 1004 bool nonblocking);
996extern void efi_find_mirror(void); 1005extern void efi_find_mirror(void);
997#else 1006#else
998static inline void efi_late_init(void) {}
999static inline void efi_free_boot_services(void) {} 1007static inline void efi_free_boot_services(void) {}
1000 1008
1001static inline efi_status_t efi_query_variable_store(u32 attributes, 1009static inline efi_status_t efi_query_variable_store(u32 attributes,
@@ -1651,4 +1659,7 @@ struct linux_efi_tpm_eventlog {
1651 1659
1652extern int efi_tpm_eventlog_init(void); 1660extern int efi_tpm_eventlog_init(void);
1653 1661
1662/* Workqueue to queue EFI Runtime Services */
1663extern struct workqueue_struct *efi_rts_wq;
1664
1654#endif /* _LINUX_EFI_H */ 1665#endif /* _LINUX_EFI_H */
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index cbb872c1b607..9d2ea3e907d0 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -73,6 +73,7 @@
73#define GICD_TYPER_MBIS (1U << 16) 73#define GICD_TYPER_MBIS (1U << 16)
74 74
75#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) 75#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
76#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1)
76#define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) 77#define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32)
77 78
78#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) 79#define GICD_IROUTER_SPI_MODE_ONE (0U << 31)
@@ -576,8 +577,8 @@ struct rdists {
576 phys_addr_t phys_base; 577 phys_addr_t phys_base;
577 } __percpu *rdist; 578 } __percpu *rdist;
578 struct page *prop_page; 579 struct page *prop_page;
579 int id_bits;
580 u64 flags; 580 u64 flags;
581 u32 gicd_typer;
581 bool has_vlpis; 582 bool has_vlpis;
582 bool has_direct_lpi; 583 bool has_direct_lpi;
583}; 584};
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 5b9fddbaac41..b2bb44f87f5a 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -93,8 +93,11 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
93/* Map the ktime_t to timeval conversion to ns_to_timeval function */ 93/* Map the ktime_t to timeval conversion to ns_to_timeval function */
94#define ktime_to_timeval(kt) ns_to_timeval((kt)) 94#define ktime_to_timeval(kt) ns_to_timeval((kt))
95 95
96/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */ 96/* Convert ktime_t to nanoseconds */
97#define ktime_to_ns(kt) (kt) 97static inline s64 ktime_to_ns(const ktime_t kt)
98{
99 return kt;
100}
98 101
99/** 102/**
100 * ktime_compare - Compares two ktime_t variables for less, greater or equal 103 * ktime_compare - Compares two ktime_t variables for less, greater or equal
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7ba6d356d18f..68a5121694ef 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -466,6 +466,9 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
466 vma->vm_ops = NULL; 466 vma->vm_ops = NULL;
467} 467}
468 468
469/* flush_tlb_range() takes a vma, not a mm, and can care about flags */
470#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
471
469struct mmu_gather; 472struct mmu_gather;
470struct inode; 473struct inode;
471 474
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 99ce070e7dcb..efdc24dd9e97 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -335,176 +335,183 @@ struct core_state {
335 335
336struct kioctx_table; 336struct kioctx_table;
337struct mm_struct { 337struct mm_struct {
338 struct vm_area_struct *mmap; /* list of VMAs */ 338 struct {
339 struct rb_root mm_rb; 339 struct vm_area_struct *mmap; /* list of VMAs */
340 u32 vmacache_seqnum; /* per-thread vmacache */ 340 struct rb_root mm_rb;
341 u32 vmacache_seqnum; /* per-thread vmacache */
341#ifdef CONFIG_MMU 342#ifdef CONFIG_MMU
342 unsigned long (*get_unmapped_area) (struct file *filp, 343 unsigned long (*get_unmapped_area) (struct file *filp,
343 unsigned long addr, unsigned long len, 344 unsigned long addr, unsigned long len,
344 unsigned long pgoff, unsigned long flags); 345 unsigned long pgoff, unsigned long flags);
345#endif 346#endif
346 unsigned long mmap_base; /* base of mmap area */ 347 unsigned long mmap_base; /* base of mmap area */
347 unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ 348 unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
348#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES 349#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
349 /* Base adresses for compatible mmap() */ 350 /* Base adresses for compatible mmap() */
350 unsigned long mmap_compat_base; 351 unsigned long mmap_compat_base;
351 unsigned long mmap_compat_legacy_base; 352 unsigned long mmap_compat_legacy_base;
352#endif 353#endif
353 unsigned long task_size; /* size of task vm space */ 354 unsigned long task_size; /* size of task vm space */
354 unsigned long highest_vm_end; /* highest vma end address */ 355 unsigned long highest_vm_end; /* highest vma end address */
355 pgd_t * pgd; 356 pgd_t * pgd;
356 357
357 /** 358 /**
358 * @mm_users: The number of users including userspace. 359 * @mm_users: The number of users including userspace.
359 * 360 *
360 * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops 361 * Use mmget()/mmget_not_zero()/mmput() to modify. When this
361 * to 0 (i.e. when the task exits and there are no other temporary 362 * drops to 0 (i.e. when the task exits and there are no other
362 * reference holders), we also release a reference on @mm_count 363 * temporary reference holders), we also release a reference on
363 * (which may then free the &struct mm_struct if @mm_count also 364 * @mm_count (which may then free the &struct mm_struct if
364 * drops to 0). 365 * @mm_count also drops to 0).
365 */ 366 */
366 atomic_t mm_users; 367 atomic_t mm_users;
367 368
368 /** 369 /**
369 * @mm_count: The number of references to &struct mm_struct 370 * @mm_count: The number of references to &struct mm_struct
370 * (@mm_users count as 1). 371 * (@mm_users count as 1).
371 * 372 *
372 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the 373 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
373 * &struct mm_struct is freed. 374 * &struct mm_struct is freed.
374 */ 375 */
375 atomic_t mm_count; 376 atomic_t mm_count;
376 377
377#ifdef CONFIG_MMU 378#ifdef CONFIG_MMU
378 atomic_long_t pgtables_bytes; /* PTE page table pages */ 379 atomic_long_t pgtables_bytes; /* PTE page table pages */
379#endif 380#endif
380 int map_count; /* number of VMAs */ 381 int map_count; /* number of VMAs */
381 382
382 spinlock_t page_table_lock; /* Protects page tables and some counters */ 383 spinlock_t page_table_lock; /* Protects page tables and some
383 struct rw_semaphore mmap_sem; 384 * counters
385 */
386 struct rw_semaphore mmap_sem;
384 387
385 struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung 388 struct list_head mmlist; /* List of maybe swapped mm's. These
386 * together off init_mm.mmlist, and are protected 389 * are globally strung together off
387 * by mmlist_lock 390 * init_mm.mmlist, and are protected
388 */ 391 * by mmlist_lock
392 */
389 393
390 394
391 unsigned long hiwater_rss; /* High-watermark of RSS usage */ 395 unsigned long hiwater_rss; /* High-watermark of RSS usage */
392 unsigned long hiwater_vm; /* High-water virtual memory usage */ 396 unsigned long hiwater_vm; /* High-water virtual memory usage */
393 397
394 unsigned long total_vm; /* Total pages mapped */ 398 unsigned long total_vm; /* Total pages mapped */
395 unsigned long locked_vm; /* Pages that have PG_mlocked set */ 399 unsigned long locked_vm; /* Pages that have PG_mlocked set */
396 unsigned long pinned_vm; /* Refcount permanently increased */ 400 unsigned long pinned_vm; /* Refcount permanently increased */
397 unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ 401 unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
398 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ 402 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
399 unsigned long stack_vm; /* VM_STACK */ 403 unsigned long stack_vm; /* VM_STACK */
400 unsigned long def_flags; 404 unsigned long def_flags;
401 405
402 spinlock_t arg_lock; /* protect the below fields */ 406 spinlock_t arg_lock; /* protect the below fields */
403 unsigned long start_code, end_code, start_data, end_data; 407 unsigned long start_code, end_code, start_data, end_data;
404 unsigned long start_brk, brk, start_stack; 408 unsigned long start_brk, brk, start_stack;
405 unsigned long arg_start, arg_end, env_start, env_end; 409 unsigned long arg_start, arg_end, env_start, env_end;
406 410
407 unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ 411 unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
408 412
409 /* 413 /*
410 * Special counters, in some configurations protected by the 414 * Special counters, in some configurations protected by the
411 * page_table_lock, in other configurations by being atomic. 415 * page_table_lock, in other configurations by being atomic.
412 */ 416 */
413 struct mm_rss_stat rss_stat; 417 struct mm_rss_stat rss_stat;
414
415 struct linux_binfmt *binfmt;
416 418
417 cpumask_var_t cpu_vm_mask_var; 419 struct linux_binfmt *binfmt;
418 420
419 /* Architecture-specific MM context */ 421 /* Architecture-specific MM context */
420 mm_context_t context; 422 mm_context_t context;
421 423
422 unsigned long flags; /* Must use atomic bitops to access the bits */ 424 unsigned long flags; /* Must use atomic bitops to access */
423 425
424 struct core_state *core_state; /* coredumping support */ 426 struct core_state *core_state; /* coredumping support */
425#ifdef CONFIG_MEMBARRIER 427#ifdef CONFIG_MEMBARRIER
426 atomic_t membarrier_state; 428 atomic_t membarrier_state;
427#endif 429#endif
428#ifdef CONFIG_AIO 430#ifdef CONFIG_AIO
429 spinlock_t ioctx_lock; 431 spinlock_t ioctx_lock;
430 struct kioctx_table __rcu *ioctx_table; 432 struct kioctx_table __rcu *ioctx_table;
431#endif 433#endif
432#ifdef CONFIG_MEMCG 434#ifdef CONFIG_MEMCG
433 /* 435 /*
434 * "owner" points to a task that is regarded as the canonical 436 * "owner" points to a task that is regarded as the canonical
435 * user/owner of this mm. All of the following must be true in 437 * user/owner of this mm. All of the following must be true in
436 * order for it to be changed: 438 * order for it to be changed:
437 * 439 *
438 * current == mm->owner 440 * current == mm->owner
439 * current->mm != mm 441 * current->mm != mm
440 * new_owner->mm == mm 442 * new_owner->mm == mm
441 * new_owner->alloc_lock is held 443 * new_owner->alloc_lock is held
442 */ 444 */
443 struct task_struct __rcu *owner; 445 struct task_struct __rcu *owner;
444#endif 446#endif
445 struct user_namespace *user_ns; 447 struct user_namespace *user_ns;
446 448
447 /* store ref to file /proc/<pid>/exe symlink points to */ 449 /* store ref to file /proc/<pid>/exe symlink points to */
448 struct file __rcu *exe_file; 450 struct file __rcu *exe_file;
449#ifdef CONFIG_MMU_NOTIFIER 451#ifdef CONFIG_MMU_NOTIFIER
450 struct mmu_notifier_mm *mmu_notifier_mm; 452 struct mmu_notifier_mm *mmu_notifier_mm;
451#endif 453#endif
452#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS 454#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
453 pgtable_t pmd_huge_pte; /* protected by page_table_lock */ 455 pgtable_t pmd_huge_pte; /* protected by page_table_lock */
454#endif
455#ifdef CONFIG_CPUMASK_OFFSTACK
456 struct cpumask cpumask_allocation;
457#endif 456#endif
458#ifdef CONFIG_NUMA_BALANCING 457#ifdef CONFIG_NUMA_BALANCING
459 /* 458 /*
460 * numa_next_scan is the next time that the PTEs will be marked 459 * numa_next_scan is the next time that the PTEs will be marked
461 * pte_numa. NUMA hinting faults will gather statistics and migrate 460 * pte_numa. NUMA hinting faults will gather statistics and
462 * pages to new nodes if necessary. 461 * migrate pages to new nodes if necessary.
463 */ 462 */
464 unsigned long numa_next_scan; 463 unsigned long numa_next_scan;
465 464
466 /* Restart point for scanning and setting pte_numa */ 465 /* Restart point for scanning and setting pte_numa */
467 unsigned long numa_scan_offset; 466 unsigned long numa_scan_offset;
468 467
469 /* numa_scan_seq prevents two threads setting pte_numa */ 468 /* numa_scan_seq prevents two threads setting pte_numa */
470 int numa_scan_seq; 469 int numa_scan_seq;
471#endif 470#endif
472 /* 471 /*
473 * An operation with batched TLB flushing is going on. Anything that 472 * An operation with batched TLB flushing is going on. Anything
474 * can move process memory needs to flush the TLB when moving a 473 * that can move process memory needs to flush the TLB when
475 * PROT_NONE or PROT_NUMA mapped page. 474 * moving a PROT_NONE or PROT_NUMA mapped page.
476 */ 475 */
477 atomic_t tlb_flush_pending; 476 atomic_t tlb_flush_pending;
478#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH 477#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
479 /* See flush_tlb_batched_pending() */ 478 /* See flush_tlb_batched_pending() */
480 bool tlb_flush_batched; 479 bool tlb_flush_batched;
481#endif 480#endif
482 struct uprobes_state uprobes_state; 481 struct uprobes_state uprobes_state;
483#ifdef CONFIG_HUGETLB_PAGE 482#ifdef CONFIG_HUGETLB_PAGE
484 atomic_long_t hugetlb_usage; 483 atomic_long_t hugetlb_usage;
485#endif 484#endif
486 struct work_struct async_put_work; 485 struct work_struct async_put_work;
487 486
488#if IS_ENABLED(CONFIG_HMM) 487#if IS_ENABLED(CONFIG_HMM)
489 /* HMM needs to track a few things per mm */ 488 /* HMM needs to track a few things per mm */
490 struct hmm *hmm; 489 struct hmm *hmm;
491#endif 490#endif
492} __randomize_layout; 491 } __randomize_layout;
492
493 /*
494 * The mm_cpumask needs to be at the end of mm_struct, because it
495 * is dynamically sized based on nr_cpu_ids.
496 */
497 unsigned long cpu_bitmap[];
498};
493 499
494extern struct mm_struct init_mm; 500extern struct mm_struct init_mm;
495 501
502/* Pointer magic because the dynamic array size confuses some compilers. */
496static inline void mm_init_cpumask(struct mm_struct *mm) 503static inline void mm_init_cpumask(struct mm_struct *mm)
497{ 504{
498#ifdef CONFIG_CPUMASK_OFFSTACK 505 unsigned long cpu_bitmap = (unsigned long)mm;
499 mm->cpu_vm_mask_var = &mm->cpumask_allocation; 506
500#endif 507 cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap);
501 cpumask_clear(mm->cpu_vm_mask_var); 508 cpumask_clear((struct cpumask *)cpu_bitmap);
502} 509}
503 510
504/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ 511/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
505static inline cpumask_t *mm_cpumask(struct mm_struct *mm) 512static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
506{ 513{
507 return mm->cpu_vm_mask_var; 514 return (struct cpumask *)&mm->cpu_bitmap;
508} 515}
509 516
510struct mmu_gather; 517struct mmu_gather;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b8d868d23e79..08f9247e9827 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -45,12 +45,18 @@ extern void touch_softlockup_watchdog(void);
45extern void touch_softlockup_watchdog_sync(void); 45extern void touch_softlockup_watchdog_sync(void);
46extern void touch_all_softlockup_watchdogs(void); 46extern void touch_all_softlockup_watchdogs(void);
47extern unsigned int softlockup_panic; 47extern unsigned int softlockup_panic;
48#else 48
49extern int lockup_detector_online_cpu(unsigned int cpu);
50extern int lockup_detector_offline_cpu(unsigned int cpu);
51#else /* CONFIG_SOFTLOCKUP_DETECTOR */
49static inline void touch_softlockup_watchdog_sched(void) { } 52static inline void touch_softlockup_watchdog_sched(void) { }
50static inline void touch_softlockup_watchdog(void) { } 53static inline void touch_softlockup_watchdog(void) { }
51static inline void touch_softlockup_watchdog_sync(void) { } 54static inline void touch_softlockup_watchdog_sync(void) { }
52static inline void touch_all_softlockup_watchdogs(void) { } 55static inline void touch_all_softlockup_watchdogs(void) { }
53#endif 56
57#define lockup_detector_online_cpu NULL
58#define lockup_detector_offline_cpu NULL
59#endif /* CONFIG_SOFTLOCKUP_DETECTOR */
54 60
55#ifdef CONFIG_DETECT_HUNG_TASK 61#ifdef CONFIG_DETECT_HUNG_TASK
56void reset_hung_task_detector(void); 62void reset_hung_task_detector(void);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index abd5d5e17aee..c133ccfa002e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -368,7 +368,6 @@ struct pci_dev {
368 unsigned int transparent:1; /* Subtractive decode bridge */ 368 unsigned int transparent:1; /* Subtractive decode bridge */
369 unsigned int multifunction:1; /* Multi-function device */ 369 unsigned int multifunction:1; /* Multi-function device */
370 370
371 unsigned int is_added:1;
372 unsigned int is_busmaster:1; /* Is busmaster */ 371 unsigned int is_busmaster:1; /* Is busmaster */
373 unsigned int no_msi:1; /* May not use MSI */ 372 unsigned int no_msi:1; /* May not use MSI */
374 unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ 373 unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index c85704fcdbd2..ee7e987ea1b4 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -95,8 +95,8 @@ struct k_itimer {
95 clockid_t it_clock; 95 clockid_t it_clock;
96 timer_t it_id; 96 timer_t it_id;
97 int it_active; 97 int it_active;
98 int it_overrun; 98 s64 it_overrun;
99 int it_overrun_last; 99 s64 it_overrun_last;
100 int it_requeue_pending; 100 int it_requeue_pending;
101 int it_sigev_notify; 101 int it_sigev_notify;
102 ktime_t it_interval; 102 ktime_t it_interval;
diff --git a/include/linux/pti.h b/include/linux/pti.h
index 0174883a935a..1a941efcaa62 100644
--- a/include/linux/pti.h
+++ b/include/linux/pti.h
@@ -6,6 +6,7 @@
6#include <asm/pti.h> 6#include <asm/pti.h>
7#else 7#else
8static inline void pti_init(void) { } 8static inline void pti_init(void) { }
9static inline void pti_finalize(void) { }
9#endif 10#endif
10 11
11#endif 12#endif
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 36df6ccbc874..4786c2235b98 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -396,7 +396,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
396 * @member: the name of the list_head within the struct. 396 * @member: the name of the list_head within the struct.
397 * 397 *
398 * Continue to iterate over list of given type, continuing after 398 * Continue to iterate over list of given type, continuing after
399 * the current position. 399 * the current position which must have been in the list when the RCU read
400 * lock was taken.
401 * This would typically require either that you obtained the node from a
402 * previous walk of the list in the same RCU read-side critical section, or
403 * that you held some sort of non-RCU reference (such as a reference count)
404 * to keep the node alive *and* in the list.
405 *
406 * This iterator is similar to list_for_each_entry_from_rcu() except
407 * this starts after the given position and that one starts at the given
408 * position.
400 */ 409 */
401#define list_for_each_entry_continue_rcu(pos, head, member) \ 410#define list_for_each_entry_continue_rcu(pos, head, member) \
402 for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \ 411 for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
@@ -411,6 +420,14 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
411 * 420 *
412 * Iterate over the tail of a list starting from a given position, 421 * Iterate over the tail of a list starting from a given position,
413 * which must have been in the list when the RCU read lock was taken. 422 * which must have been in the list when the RCU read lock was taken.
423 * This would typically require either that you obtained the node from a
424 * previous walk of the list in the same RCU read-side critical section, or
425 * that you held some sort of non-RCU reference (such as a reference count)
426 * to keep the node alive *and* in the list.
427 *
428 * This iterator is similar to list_for_each_entry_continue_rcu() except
429 * this starts from the given position and that one starts from the position
430 * after the given position.
414 */ 431 */
415#define list_for_each_entry_from_rcu(pos, head, member) \ 432#define list_for_each_entry_from_rcu(pos, head, member) \
416 for (; &(pos)->member != (head); \ 433 for (; &(pos)->member != (head); \
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 65163aa0bb04..75e5b393cf44 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -64,7 +64,6 @@ void rcu_barrier_tasks(void);
64 64
65void __rcu_read_lock(void); 65void __rcu_read_lock(void);
66void __rcu_read_unlock(void); 66void __rcu_read_unlock(void);
67void rcu_read_unlock_special(struct task_struct *t);
68void synchronize_rcu(void); 67void synchronize_rcu(void);
69 68
70/* 69/*
@@ -159,11 +158,11 @@ static inline void rcu_init_nohz(void) { }
159 } while (0) 158 } while (0)
160 159
161/* 160/*
162 * Note a voluntary context switch for RCU-tasks benefit. This is a 161 * Note a quasi-voluntary context switch for RCU-tasks's benefit.
163 * macro rather than an inline function to avoid #include hell. 162 * This is a macro rather than an inline function to avoid #include hell.
164 */ 163 */
165#ifdef CONFIG_TASKS_RCU 164#ifdef CONFIG_TASKS_RCU
166#define rcu_note_voluntary_context_switch_lite(t) \ 165#define rcu_tasks_qs(t) \
167 do { \ 166 do { \
168 if (READ_ONCE((t)->rcu_tasks_holdout)) \ 167 if (READ_ONCE((t)->rcu_tasks_holdout)) \
169 WRITE_ONCE((t)->rcu_tasks_holdout, false); \ 168 WRITE_ONCE((t)->rcu_tasks_holdout, false); \
@@ -171,14 +170,14 @@ static inline void rcu_init_nohz(void) { }
171#define rcu_note_voluntary_context_switch(t) \ 170#define rcu_note_voluntary_context_switch(t) \
172 do { \ 171 do { \
173 rcu_all_qs(); \ 172 rcu_all_qs(); \
174 rcu_note_voluntary_context_switch_lite(t); \ 173 rcu_tasks_qs(t); \
175 } while (0) 174 } while (0)
176void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); 175void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
177void synchronize_rcu_tasks(void); 176void synchronize_rcu_tasks(void);
178void exit_tasks_rcu_start(void); 177void exit_tasks_rcu_start(void);
179void exit_tasks_rcu_finish(void); 178void exit_tasks_rcu_finish(void);
180#else /* #ifdef CONFIG_TASKS_RCU */ 179#else /* #ifdef CONFIG_TASKS_RCU */
181#define rcu_note_voluntary_context_switch_lite(t) do { } while (0) 180#define rcu_tasks_qs(t) do { } while (0)
182#define rcu_note_voluntary_context_switch(t) rcu_all_qs() 181#define rcu_note_voluntary_context_switch(t) rcu_all_qs()
183#define call_rcu_tasks call_rcu_sched 182#define call_rcu_tasks call_rcu_sched
184#define synchronize_rcu_tasks synchronize_sched 183#define synchronize_rcu_tasks synchronize_sched
@@ -195,8 +194,8 @@ static inline void exit_tasks_rcu_finish(void) { }
195 */ 194 */
196#define cond_resched_tasks_rcu_qs() \ 195#define cond_resched_tasks_rcu_qs() \
197do { \ 196do { \
198 if (!cond_resched()) \ 197 rcu_tasks_qs(current); \
199 rcu_note_voluntary_context_switch_lite(current); \ 198 cond_resched(); \
200} while (0) 199} while (0)
201 200
202/* 201/*
@@ -567,8 +566,8 @@ static inline void rcu_preempt_sleep_check(void) { }
567 * This is simply an identity function, but it documents where a pointer 566 * This is simply an identity function, but it documents where a pointer
568 * is handed off from RCU to some other synchronization mechanism, for 567 * is handed off from RCU to some other synchronization mechanism, for
569 * example, reference counting or locking. In C11, it would map to 568 * example, reference counting or locking. In C11, it would map to
570 * kill_dependency(). It could be used as follows: 569 * kill_dependency(). It could be used as follows::
571 * `` 570 *
572 * rcu_read_lock(); 571 * rcu_read_lock();
573 * p = rcu_dereference(gp); 572 * p = rcu_dereference(gp);
574 * long_lived = is_long_lived(p); 573 * long_lived = is_long_lived(p);
@@ -579,7 +578,6 @@ static inline void rcu_preempt_sleep_check(void) { }
579 * p = rcu_pointer_handoff(p); 578 * p = rcu_pointer_handoff(p);
580 * } 579 * }
581 * rcu_read_unlock(); 580 * rcu_read_unlock();
582 *``
583 */ 581 */
584#define rcu_pointer_handoff(p) (p) 582#define rcu_pointer_handoff(p) (p)
585 583
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7b3c82e8a625..8d9a0ea8f0b5 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -93,7 +93,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
93#define rcu_note_context_switch(preempt) \ 93#define rcu_note_context_switch(preempt) \
94 do { \ 94 do { \
95 rcu_sched_qs(); \ 95 rcu_sched_qs(); \
96 rcu_note_voluntary_context_switch_lite(current); \ 96 rcu_tasks_qs(current); \
97 } while (0) 97 } while (0)
98 98
99static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) 99static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index a685da2c4522..e28cce21bad6 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -3,9 +3,10 @@
3#define _LINUX_REFCOUNT_H 3#define _LINUX_REFCOUNT_H
4 4
5#include <linux/atomic.h> 5#include <linux/atomic.h>
6#include <linux/mutex.h> 6#include <linux/compiler.h>
7#include <linux/spinlock.h> 7#include <linux/spinlock_types.h>
8#include <linux/kernel.h> 8
9struct mutex;
9 10
10/** 11/**
11 * struct refcount_t - variant of atomic_t specialized for reference counts 12 * struct refcount_t - variant of atomic_t specialized for reference counts
@@ -42,17 +43,30 @@ static inline unsigned int refcount_read(const refcount_t *r)
42 return atomic_read(&r->refs); 43 return atomic_read(&r->refs);
43} 44}
44 45
46extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r);
47extern void refcount_add_checked(unsigned int i, refcount_t *r);
48
49extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r);
50extern void refcount_inc_checked(refcount_t *r);
51
52extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r);
53
54extern __must_check bool refcount_dec_and_test_checked(refcount_t *r);
55extern void refcount_dec_checked(refcount_t *r);
56
45#ifdef CONFIG_REFCOUNT_FULL 57#ifdef CONFIG_REFCOUNT_FULL
46extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
47extern void refcount_add(unsigned int i, refcount_t *r);
48 58
49extern __must_check bool refcount_inc_not_zero(refcount_t *r); 59#define refcount_add_not_zero refcount_add_not_zero_checked
50extern void refcount_inc(refcount_t *r); 60#define refcount_add refcount_add_checked
61
62#define refcount_inc_not_zero refcount_inc_not_zero_checked
63#define refcount_inc refcount_inc_checked
64
65#define refcount_sub_and_test refcount_sub_and_test_checked
51 66
52extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); 67#define refcount_dec_and_test refcount_dec_and_test_checked
68#define refcount_dec refcount_dec_checked
53 69
54extern __must_check bool refcount_dec_and_test(refcount_t *r);
55extern void refcount_dec(refcount_t *r);
56#else 70#else
57# ifdef CONFIG_ARCH_HAS_REFCOUNT 71# ifdef CONFIG_ARCH_HAS_REFCOUNT
58# include <asm/refcount.h> 72# include <asm/refcount.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..dac5086e3815 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -167,8 +167,8 @@ struct task_group;
167 * need_sleep = false; 167 * need_sleep = false;
168 * wake_up_state(p, TASK_UNINTERRUPTIBLE); 168 * wake_up_state(p, TASK_UNINTERRUPTIBLE);
169 * 169 *
170 * Where wake_up_state() (and all other wakeup primitives) imply enough 170 * where wake_up_state() executes a full memory barrier before accessing the
171 * barriers to order the store of the variable against wakeup. 171 * task state.
172 * 172 *
173 * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, 173 * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
174 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a 174 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
@@ -1017,7 +1017,6 @@ struct task_struct {
1017 u64 last_sum_exec_runtime; 1017 u64 last_sum_exec_runtime;
1018 struct callback_head numa_work; 1018 struct callback_head numa_work;
1019 1019
1020 struct list_head numa_entry;
1021 struct numa_group *numa_group; 1020 struct numa_group *numa_group;
1022 1021
1023 /* 1022 /*
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 1c1a1512ec55..913488d828cb 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -40,7 +40,6 @@ extern unsigned int sysctl_numa_balancing_scan_size;
40#ifdef CONFIG_SCHED_DEBUG 40#ifdef CONFIG_SCHED_DEBUG
41extern __read_mostly unsigned int sysctl_sched_migration_cost; 41extern __read_mostly unsigned int sysctl_sched_migration_cost;
42extern __read_mostly unsigned int sysctl_sched_nr_migrate; 42extern __read_mostly unsigned int sysctl_sched_nr_migrate;
43extern __read_mostly unsigned int sysctl_sched_time_avg;
44 43
45int sched_proc_update_handler(struct ctl_table *table, int write, 44int sched_proc_update_handler(struct ctl_table *table, int write,
46 void __user *buffer, size_t *length, 45 void __user *buffer, size_t *length,
diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index 411b52e424e1..abe28d5cb3f4 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -9,17 +9,16 @@
9#define LINUX_SCHED_CLOCK 9#define LINUX_SCHED_CLOCK
10 10
11#ifdef CONFIG_GENERIC_SCHED_CLOCK 11#ifdef CONFIG_GENERIC_SCHED_CLOCK
12extern void sched_clock_postinit(void); 12extern void generic_sched_clock_init(void);
13 13
14extern void sched_clock_register(u64 (*read)(void), int bits, 14extern void sched_clock_register(u64 (*read)(void), int bits,
15 unsigned long rate); 15 unsigned long rate);
16#else 16#else
17static inline void sched_clock_postinit(void) { } 17static inline void generic_sched_clock_init(void) { }
18 18
19static inline void sched_clock_register(u64 (*read)(void), int bits, 19static inline void sched_clock_register(u64 (*read)(void), int bits,
20 unsigned long rate) 20 unsigned long rate)
21{ 21{
22 ;
23} 22}
24#endif 23#endif
25 24
diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index c174844cf663..d0884b525001 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -25,8 +25,6 @@ struct smpboot_thread_data;
25 * parked (cpu offline) 25 * parked (cpu offline)
26 * @unpark: Optional unpark function, called when the thread is 26 * @unpark: Optional unpark function, called when the thread is
27 * unparked (cpu online) 27 * unparked (cpu online)
28 * @cpumask: Internal state. To update which threads are unparked,
29 * call smpboot_update_cpumask_percpu_thread().
30 * @selfparking: Thread is not parked by the park function. 28 * @selfparking: Thread is not parked by the park function.
31 * @thread_comm: The base name of the thread 29 * @thread_comm: The base name of the thread
32 */ 30 */
@@ -40,23 +38,12 @@ struct smp_hotplug_thread {
40 void (*cleanup)(unsigned int cpu, bool online); 38 void (*cleanup)(unsigned int cpu, bool online);
41 void (*park)(unsigned int cpu); 39 void (*park)(unsigned int cpu);
42 void (*unpark)(unsigned int cpu); 40 void (*unpark)(unsigned int cpu);
43 cpumask_var_t cpumask;
44 bool selfparking; 41 bool selfparking;
45 const char *thread_comm; 42 const char *thread_comm;
46}; 43};
47 44
48int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, 45int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
49 const struct cpumask *cpumask);
50
51static inline int
52smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
53{
54 return smpboot_register_percpu_thread_cpumask(plug_thread,
55 cpu_possible_mask);
56}
57 46
58void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); 47void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
59void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
60 const struct cpumask *);
61 48
62#endif 49#endif
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index fd57888d4942..3190997df9ca 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -114,29 +114,48 @@ do { \
114#endif /*arch_spin_is_contended*/ 114#endif /*arch_spin_is_contended*/
115 115
116/* 116/*
117 * This barrier must provide two things: 117 * smp_mb__after_spinlock() provides the equivalent of a full memory barrier
118 * between program-order earlier lock acquisitions and program-order later
119 * memory accesses.
118 * 120 *
119 * - it must guarantee a STORE before the spin_lock() is ordered against a 121 * This guarantees that the following two properties hold:
120 * LOAD after it, see the comments at its two usage sites.
121 * 122 *
122 * - it must ensure the critical section is RCsc. 123 * 1) Given the snippet:
123 * 124 *
124 * The latter is important for cases where we observe values written by other 125 * { X = 0; Y = 0; }
125 * CPUs in spin-loops, without barriers, while being subject to scheduling.
126 * 126 *
127 * CPU0 CPU1 CPU2 127 * CPU0 CPU1
128 * 128 *
129 * for (;;) { 129 * WRITE_ONCE(X, 1); WRITE_ONCE(Y, 1);
130 * if (READ_ONCE(X)) 130 * spin_lock(S); smp_mb();
131 * break; 131 * smp_mb__after_spinlock(); r1 = READ_ONCE(X);
132 * } 132 * r0 = READ_ONCE(Y);
133 * X=1 133 * spin_unlock(S);
134 * <sched-out>
135 * <sched-in>
136 * r = X;
137 * 134 *
138 * without transitivity it could be that CPU1 observes X!=0 breaks the loop, 135 * it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0)
139 * we get migrated and CPU2 sees X==0. 136 * and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments
137 * preceding the call to smp_mb__after_spinlock() in __schedule() and in
138 * try_to_wake_up().
139 *
140 * 2) Given the snippet:
141 *
142 * { X = 0; Y = 0; }
143 *
144 * CPU0 CPU1 CPU2
145 *
146 * spin_lock(S); spin_lock(S); r1 = READ_ONCE(Y);
147 * WRITE_ONCE(X, 1); smp_mb__after_spinlock(); smp_rmb();
148 * spin_unlock(S); r0 = READ_ONCE(X); r2 = READ_ONCE(X);
149 * WRITE_ONCE(Y, 1);
150 * spin_unlock(S);
151 *
152 * it is forbidden that CPU0's critical section executes before CPU1's
153 * critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1)
154 * and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments
155 * preceding the calls to smp_rmb() in try_to_wake_up() for similar
156 * snippets but "projected" onto two CPUs.
157 *
158 * Property (2) upgrades the lock to an RCsc lock.
140 * 159 *
141 * Since most load-store architectures implement ACQUIRE with an smp_mb() after 160 * Since most load-store architectures implement ACQUIRE with an smp_mb() after
142 * the LL/SC loop, they need no further barriers. Similarly all our TSO 161 * the LL/SC loop, they need no further barriers. Similarly all our TSO
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 91494d7e8e41..3e72a291c401 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
195 return retval; 195 return retval;
196} 196}
197 197
198/* Used by tracing, cannot be traced and cannot invoke lockdep. */
199static inline notrace int
200srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp)
201{
202 int retval;
203
204 retval = __srcu_read_lock(sp);
205 return retval;
206}
207
198/** 208/**
199 * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. 209 * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
200 * @sp: srcu_struct in which to unregister the old reader. 210 * @sp: srcu_struct in which to unregister the old reader.
@@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
209 __srcu_read_unlock(sp, idx); 219 __srcu_read_unlock(sp, idx);
210} 220}
211 221
222/* Used by tracing, cannot be traced and cannot call lockdep. */
223static inline notrace void
224srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp)
225{
226 __srcu_read_unlock(sp, idx);
227}
228
212/** 229/**
213 * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock 230 * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
214 * 231 *
diff --git a/include/linux/swait.h b/include/linux/swait.h
index bf8cb0dee23c..73e06e9986d4 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -16,7 +16,7 @@
16 * wait-queues, but the semantics are actually completely different, and 16 * wait-queues, but the semantics are actually completely different, and
17 * every single user we have ever had has been buggy (or pointless). 17 * every single user we have ever had has been buggy (or pointless).
18 * 18 *
19 * A "swake_up()" only wakes up _one_ waiter, which is not at all what 19 * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what
20 * "wake_up()" does, and has led to problems. In other cases, it has 20 * "wake_up()" does, and has led to problems. In other cases, it has
21 * been fine, because there's only ever one waiter (kvm), but in that 21 * been fine, because there's only ever one waiter (kvm), but in that
22 * case gthe whole "simple" wait-queue is just pointless to begin with, 22 * case gthe whole "simple" wait-queue is just pointless to begin with,
@@ -38,8 +38,8 @@
38 * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right 38 * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
39 * sleeper state. 39 * sleeper state.
40 * 40 *
41 * - the exclusive mode; because this requires preserving the list order 41 * - the !exclusive mode; because that leads to O(n) wakeups, everything is
42 * and this is hard. 42 * exclusive.
43 * 43 *
44 * - custom wake callback functions; because you cannot give any guarantees 44 * - custom wake callback functions; because you cannot give any guarantees
45 * about random code. This also allows swait to be used in RT, such that 45 * about random code. This also allows swait to be used in RT, such that
@@ -115,7 +115,7 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name
115 * CPU0 - waker CPU1 - waiter 115 * CPU0 - waker CPU1 - waiter
116 * 116 *
117 * for (;;) { 117 * for (;;) {
118 * @cond = true; prepare_to_swait(&wq_head, &wait, state); 118 * @cond = true; prepare_to_swait_exclusive(&wq_head, &wait, state);
119 * smp_mb(); // smp_mb() from set_current_state() 119 * smp_mb(); // smp_mb() from set_current_state()
120 * if (swait_active(wq_head)) if (@cond) 120 * if (swait_active(wq_head)) if (@cond)
121 * wake_up(wq_head); break; 121 * wake_up(wq_head); break;
@@ -157,20 +157,20 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
157 return swait_active(wq); 157 return swait_active(wq);
158} 158}
159 159
160extern void swake_up(struct swait_queue_head *q); 160extern void swake_up_one(struct swait_queue_head *q);
161extern void swake_up_all(struct swait_queue_head *q); 161extern void swake_up_all(struct swait_queue_head *q);
162extern void swake_up_locked(struct swait_queue_head *q); 162extern void swake_up_locked(struct swait_queue_head *q);
163 163
164extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); 164extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
165extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
166extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); 165extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
167 166
168extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); 167extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
169extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); 168extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
170 169
171/* as per ___wait_event() but for swait, therefore "exclusive == 0" */ 170/* as per ___wait_event() but for swait, therefore "exclusive == 1" */
172#define ___swait_event(wq, condition, state, ret, cmd) \ 171#define ___swait_event(wq, condition, state, ret, cmd) \
173({ \ 172({ \
173 __label__ __out; \
174 struct swait_queue __wait; \ 174 struct swait_queue __wait; \
175 long __ret = ret; \ 175 long __ret = ret; \
176 \ 176 \
@@ -183,20 +183,20 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
183 \ 183 \
184 if (___wait_is_interruptible(state) && __int) { \ 184 if (___wait_is_interruptible(state) && __int) { \
185 __ret = __int; \ 185 __ret = __int; \
186 break; \ 186 goto __out; \
187 } \ 187 } \
188 \ 188 \
189 cmd; \ 189 cmd; \
190 } \ 190 } \
191 finish_swait(&wq, &__wait); \ 191 finish_swait(&wq, &__wait); \
192 __ret; \ 192__out: __ret; \
193}) 193})
194 194
195#define __swait_event(wq, condition) \ 195#define __swait_event(wq, condition) \
196 (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ 196 (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \
197 schedule()) 197 schedule())
198 198
199#define swait_event(wq, condition) \ 199#define swait_event_exclusive(wq, condition) \
200do { \ 200do { \
201 if (condition) \ 201 if (condition) \
202 break; \ 202 break; \
@@ -208,7 +208,7 @@ do { \
208 TASK_UNINTERRUPTIBLE, timeout, \ 208 TASK_UNINTERRUPTIBLE, timeout, \
209 __ret = schedule_timeout(__ret)) 209 __ret = schedule_timeout(__ret))
210 210
211#define swait_event_timeout(wq, condition, timeout) \ 211#define swait_event_timeout_exclusive(wq, condition, timeout) \
212({ \ 212({ \
213 long __ret = timeout; \ 213 long __ret = timeout; \
214 if (!___wait_cond_timeout(condition)) \ 214 if (!___wait_cond_timeout(condition)) \
@@ -220,7 +220,7 @@ do { \
220 ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \ 220 ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \
221 schedule()) 221 schedule())
222 222
223#define swait_event_interruptible(wq, condition) \ 223#define swait_event_interruptible_exclusive(wq, condition) \
224({ \ 224({ \
225 int __ret = 0; \ 225 int __ret = 0; \
226 if (!(condition)) \ 226 if (!(condition)) \
@@ -233,7 +233,7 @@ do { \
233 TASK_INTERRUPTIBLE, timeout, \ 233 TASK_INTERRUPTIBLE, timeout, \
234 __ret = schedule_timeout(__ret)) 234 __ret = schedule_timeout(__ret))
235 235
236#define swait_event_interruptible_timeout(wq, condition, timeout) \ 236#define swait_event_interruptible_timeout_exclusive(wq, condition, timeout)\
237({ \ 237({ \
238 long __ret = timeout; \ 238 long __ret = timeout; \
239 if (!___wait_cond_timeout(condition)) \ 239 if (!___wait_cond_timeout(condition)) \
@@ -246,7 +246,7 @@ do { \
246 (void)___swait_event(wq, condition, TASK_IDLE, 0, schedule()) 246 (void)___swait_event(wq, condition, TASK_IDLE, 0, schedule())
247 247
248/** 248/**
249 * swait_event_idle - wait without system load contribution 249 * swait_event_idle_exclusive - wait without system load contribution
250 * @wq: the waitqueue to wait on 250 * @wq: the waitqueue to wait on
251 * @condition: a C expression for the event to wait for 251 * @condition: a C expression for the event to wait for
252 * 252 *
@@ -257,7 +257,7 @@ do { \
257 * condition and doesn't want to contribute to system load. Signals are 257 * condition and doesn't want to contribute to system load. Signals are
258 * ignored. 258 * ignored.
259 */ 259 */
260#define swait_event_idle(wq, condition) \ 260#define swait_event_idle_exclusive(wq, condition) \
261do { \ 261do { \
262 if (condition) \ 262 if (condition) \
263 break; \ 263 break; \
@@ -270,7 +270,7 @@ do { \
270 __ret = schedule_timeout(__ret)) 270 __ret = schedule_timeout(__ret))
271 271
272/** 272/**
273 * swait_event_idle_timeout - wait up to timeout without load contribution 273 * swait_event_idle_timeout_exclusive - wait up to timeout without load contribution
274 * @wq: the waitqueue to wait on 274 * @wq: the waitqueue to wait on
275 * @condition: a C expression for the event to wait for 275 * @condition: a C expression for the event to wait for
276 * @timeout: timeout at which we'll give up in jiffies 276 * @timeout: timeout at which we'll give up in jiffies
@@ -288,7 +288,7 @@ do { \
288 * or the remaining jiffies (at least 1) if the @condition evaluated 288 * or the remaining jiffies (at least 1) if the @condition evaluated
289 * to %true before the @timeout elapsed. 289 * to %true before the @timeout elapsed.
290 */ 290 */
291#define swait_event_idle_timeout(wq, condition, timeout) \ 291#define swait_event_idle_timeout_exclusive(wq, condition, timeout) \
292({ \ 292({ \
293 long __ret = timeout; \ 293 long __ret = timeout; \
294 if (!___wait_cond_timeout(condition)) \ 294 if (!___wait_cond_timeout(condition)) \
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5c1a0933768e..ebb2f24027e8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -506,9 +506,9 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
506/* fs/timerfd.c */ 506/* fs/timerfd.c */
507asmlinkage long sys_timerfd_create(int clockid, int flags); 507asmlinkage long sys_timerfd_create(int clockid, int flags);
508asmlinkage long sys_timerfd_settime(int ufd, int flags, 508asmlinkage long sys_timerfd_settime(int ufd, int flags,
509 const struct itimerspec __user *utmr, 509 const struct __kernel_itimerspec __user *utmr,
510 struct itimerspec __user *otmr); 510 struct __kernel_itimerspec __user *otmr);
511asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); 511asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr);
512 512
513/* fs/utimes.c */ 513/* fs/utimes.c */
514asmlinkage long sys_utimensat(int dfd, const char __user *filename, 514asmlinkage long sys_utimensat(int dfd, const char __user *filename,
@@ -573,10 +573,10 @@ asmlinkage long sys_timer_create(clockid_t which_clock,
573 struct sigevent __user *timer_event_spec, 573 struct sigevent __user *timer_event_spec,
574 timer_t __user * created_timer_id); 574 timer_t __user * created_timer_id);
575asmlinkage long sys_timer_gettime(timer_t timer_id, 575asmlinkage long sys_timer_gettime(timer_t timer_id,
576 struct itimerspec __user *setting); 576 struct __kernel_itimerspec __user *setting);
577asmlinkage long sys_timer_getoverrun(timer_t timer_id); 577asmlinkage long sys_timer_getoverrun(timer_t timer_id);
578asmlinkage long sys_timer_settime(timer_t timer_id, int flags, 578asmlinkage long sys_timer_settime(timer_t timer_id, int flags,
579 const struct itimerspec __user *new_setting, 579 const struct __kernel_itimerspec __user *new_setting,
580 struct itimerspec __user *old_setting); 580 struct itimerspec __user *old_setting);
581asmlinkage long sys_timer_delete(timer_t timer_id); 581asmlinkage long sys_timer_delete(timer_t timer_id);
582asmlinkage long sys_clock_settime(clockid_t which_clock, 582asmlinkage long sys_clock_settime(clockid_t which_clock,
diff --git a/include/linux/time.h b/include/linux/time.h
index aed74463592d..27d83fd2ae61 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -14,9 +14,9 @@ int get_timespec64(struct timespec64 *ts,
14int put_timespec64(const struct timespec64 *ts, 14int put_timespec64(const struct timespec64 *ts,
15 struct __kernel_timespec __user *uts); 15 struct __kernel_timespec __user *uts);
16int get_itimerspec64(struct itimerspec64 *it, 16int get_itimerspec64(struct itimerspec64 *it,
17 const struct itimerspec __user *uit); 17 const struct __kernel_itimerspec __user *uit);
18int put_itimerspec64(const struct itimerspec64 *it, 18int put_itimerspec64(const struct itimerspec64 *it,
19 struct itimerspec __user *uit); 19 struct __kernel_itimerspec __user *uit);
20 20
21extern time64_t mktime64(const unsigned int year, const unsigned int mon, 21extern time64_t mktime64(const unsigned int year, const unsigned int mon,
22 const unsigned int day, const unsigned int hour, 22 const unsigned int day, const unsigned int hour,
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 0a7b2f79cec7..05634afba0db 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -12,6 +12,7 @@ typedef __u64 timeu64_t;
12 */ 12 */
13#ifndef CONFIG_64BIT_TIME 13#ifndef CONFIG_64BIT_TIME
14#define __kernel_timespec timespec 14#define __kernel_timespec timespec
15#define __kernel_itimerspec itimerspec
15#endif 16#endif
16 17
17#include <uapi/linux/time.h> 18#include <uapi/linux/time.h>
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 86bc2026efce..e79861418fd7 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -177,7 +177,7 @@ static inline time64_t ktime_get_clocktai_seconds(void)
177extern bool timekeeping_rtc_skipsuspend(void); 177extern bool timekeeping_rtc_skipsuspend(void);
178extern bool timekeeping_rtc_skipresume(void); 178extern bool timekeeping_rtc_skipresume(void);
179 179
180extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); 180extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
181 181
182/* 182/*
183 * struct system_time_snapshot - simultaneous raw/real time capture with 183 * struct system_time_snapshot - simultaneous raw/real time capture with
@@ -243,7 +243,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
243extern int persistent_clock_is_local; 243extern int persistent_clock_is_local;
244 244
245extern void read_persistent_clock64(struct timespec64 *ts); 245extern void read_persistent_clock64(struct timespec64 *ts);
246extern void read_boot_clock64(struct timespec64 *ts); 246void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock,
247 struct timespec64 *boot_offset);
247extern int update_persistent_clock64(struct timespec64 now); 248extern int update_persistent_clock64(struct timespec64 now);
248 249
249/* 250/*
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 66272862070b..61dfd93b6ee4 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -64,6 +64,8 @@ struct torture_random_state {
64 long trs_count; 64 long trs_count;
65}; 65};
66#define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } 66#define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 }
67#define DEFINE_TORTURE_RANDOM_PERCPU(name) \
68 DEFINE_PER_CPU(struct torture_random_state, name)
67unsigned long torture_random(struct torture_random_state *trsp); 69unsigned long torture_random(struct torture_random_state *trsp);
68 70
69/* Task shuffler, which causes CPUs to occasionally go idle. */ 71/* Task shuffler, which causes CPUs to occasionally go idle. */
@@ -79,7 +81,7 @@ void stutter_wait(const char *title);
79int torture_stutter_init(int s); 81int torture_stutter_init(int s);
80 82
81/* Initialization and cleanup. */ 83/* Initialization and cleanup. */
82bool torture_init_begin(char *ttype, bool v); 84bool torture_init_begin(char *ttype, int v);
83void torture_init_end(void); 85void torture_init_end(void);
84bool torture_cleanup_begin(void); 86bool torture_cleanup_begin(void);
85void torture_cleanup_end(void); 87void torture_cleanup_end(void);
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 9324ac2d9ff2..43913ae79f64 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -64,7 +64,8 @@ struct vsock_sock {
64 struct list_head pending_links; 64 struct list_head pending_links;
65 struct list_head accept_queue; 65 struct list_head accept_queue;
66 bool rejected; 66 bool rejected;
67 struct delayed_work dwork; 67 struct delayed_work connect_work;
68 struct delayed_work pending_work;
68 struct delayed_work close_work; 69 struct delayed_work close_work;
69 bool close_work_scheduled; 70 bool close_work_scheduled;
70 u32 peer_shutdown; 71 u32 peer_shutdown;
@@ -77,7 +78,6 @@ struct vsock_sock {
77 78
78s64 vsock_stream_has_data(struct vsock_sock *vsk); 79s64 vsock_stream_has_data(struct vsock_sock *vsk);
79s64 vsock_stream_has_space(struct vsock_sock *vsk); 80s64 vsock_stream_has_space(struct vsock_sock *vsk);
80void vsock_pending_work(struct work_struct *work);
81struct sock *__vsock_create(struct net *net, 81struct sock *__vsock_create(struct net *net,
82 struct socket *sock, 82 struct socket *sock,
83 struct sock *parent, 83 struct sock *parent,
diff --git a/include/net/llc.h b/include/net/llc.h
index dc35f25eb679..890a87318014 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct llc_sap *sap)
116 refcount_inc(&sap->refcnt); 116 refcount_inc(&sap->refcnt);
117} 117}
118 118
119static inline bool llc_sap_hold_safe(struct llc_sap *sap)
120{
121 return refcount_inc_not_zero(&sap->refcnt);
122}
123
119void llc_sap_close(struct llc_sap *sap); 124void llc_sap_close(struct llc_sap *sap);
120 125
121static inline void llc_sap_put(struct llc_sap *sap) 126static inline void llc_sap_put(struct llc_sap *sap)
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 5936aac357ab..a8d07feff6a0 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -52,6 +52,7 @@ TRACE_EVENT(rcu_utilization,
52 * "cpuqs": CPU passes through a quiescent state. 52 * "cpuqs": CPU passes through a quiescent state.
53 * "cpuonl": CPU comes online. 53 * "cpuonl": CPU comes online.
54 * "cpuofl": CPU goes offline. 54 * "cpuofl": CPU goes offline.
55 * "cpuofl-bgp": CPU goes offline while blocking a grace period.
55 * "reqwait": GP kthread sleeps waiting for grace-period request. 56 * "reqwait": GP kthread sleeps waiting for grace-period request.
56 * "reqwaitsig": GP kthread awakened by signal from reqwait state. 57 * "reqwaitsig": GP kthread awakened by signal from reqwait state.
57 * "fqswait": GP kthread waiting until time to force quiescent states. 58 * "fqswait": GP kthread waiting until time to force quiescent states.
@@ -63,24 +64,24 @@ TRACE_EVENT(rcu_utilization,
63 */ 64 */
64TRACE_EVENT(rcu_grace_period, 65TRACE_EVENT(rcu_grace_period,
65 66
66 TP_PROTO(const char *rcuname, unsigned long gpnum, const char *gpevent), 67 TP_PROTO(const char *rcuname, unsigned long gp_seq, const char *gpevent),
67 68
68 TP_ARGS(rcuname, gpnum, gpevent), 69 TP_ARGS(rcuname, gp_seq, gpevent),
69 70
70 TP_STRUCT__entry( 71 TP_STRUCT__entry(
71 __field(const char *, rcuname) 72 __field(const char *, rcuname)
72 __field(unsigned long, gpnum) 73 __field(unsigned long, gp_seq)
73 __field(const char *, gpevent) 74 __field(const char *, gpevent)
74 ), 75 ),
75 76
76 TP_fast_assign( 77 TP_fast_assign(
77 __entry->rcuname = rcuname; 78 __entry->rcuname = rcuname;
78 __entry->gpnum = gpnum; 79 __entry->gp_seq = gp_seq;
79 __entry->gpevent = gpevent; 80 __entry->gpevent = gpevent;
80 ), 81 ),
81 82
82 TP_printk("%s %lu %s", 83 TP_printk("%s %lu %s",
83 __entry->rcuname, __entry->gpnum, __entry->gpevent) 84 __entry->rcuname, __entry->gp_seq, __entry->gpevent)
84); 85);
85 86
86/* 87/*
@@ -90,8 +91,8 @@ TRACE_EVENT(rcu_grace_period,
90 * 91 *
91 * "Startleaf": Request a grace period based on leaf-node data. 92 * "Startleaf": Request a grace period based on leaf-node data.
92 * "Prestarted": Someone beat us to the request 93 * "Prestarted": Someone beat us to the request
93 * "Startedleaf": Leaf-node start proved sufficient. 94 * "Startedleaf": Leaf node marked for future GP.
94 * "Startedleafroot": Leaf-node start proved sufficient after checking root. 95 * "Startedleafroot": All nodes from leaf to root marked for future GP.
95 * "Startedroot": Requested a nocb grace period based on root-node data. 96 * "Startedroot": Requested a nocb grace period based on root-node data.
96 * "NoGPkthread": The RCU grace-period kthread has not yet started. 97 * "NoGPkthread": The RCU grace-period kthread has not yet started.
97 * "StartWait": Start waiting for the requested grace period. 98 * "StartWait": Start waiting for the requested grace period.
@@ -102,17 +103,16 @@ TRACE_EVENT(rcu_grace_period,
102 */ 103 */
103TRACE_EVENT(rcu_future_grace_period, 104TRACE_EVENT(rcu_future_grace_period,
104 105
105 TP_PROTO(const char *rcuname, unsigned long gpnum, unsigned long completed, 106 TP_PROTO(const char *rcuname, unsigned long gp_seq,
106 unsigned long c, u8 level, int grplo, int grphi, 107 unsigned long gp_seq_req, u8 level, int grplo, int grphi,
107 const char *gpevent), 108 const char *gpevent),
108 109
109 TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent), 110 TP_ARGS(rcuname, gp_seq, gp_seq_req, level, grplo, grphi, gpevent),
110 111
111 TP_STRUCT__entry( 112 TP_STRUCT__entry(
112 __field(const char *, rcuname) 113 __field(const char *, rcuname)
113 __field(unsigned long, gpnum) 114 __field(unsigned long, gp_seq)
114 __field(unsigned long, completed) 115 __field(unsigned long, gp_seq_req)
115 __field(unsigned long, c)
116 __field(u8, level) 116 __field(u8, level)
117 __field(int, grplo) 117 __field(int, grplo)
118 __field(int, grphi) 118 __field(int, grphi)
@@ -121,19 +121,17 @@ TRACE_EVENT(rcu_future_grace_period,
121 121
122 TP_fast_assign( 122 TP_fast_assign(
123 __entry->rcuname = rcuname; 123 __entry->rcuname = rcuname;
124 __entry->gpnum = gpnum; 124 __entry->gp_seq = gp_seq;
125 __entry->completed = completed; 125 __entry->gp_seq_req = gp_seq_req;
126 __entry->c = c;
127 __entry->level = level; 126 __entry->level = level;
128 __entry->grplo = grplo; 127 __entry->grplo = grplo;
129 __entry->grphi = grphi; 128 __entry->grphi = grphi;
130 __entry->gpevent = gpevent; 129 __entry->gpevent = gpevent;
131 ), 130 ),
132 131
133 TP_printk("%s %lu %lu %lu %u %d %d %s", 132 TP_printk("%s %lu %lu %u %d %d %s",
134 __entry->rcuname, __entry->gpnum, __entry->completed, 133 __entry->rcuname, __entry->gp_seq, __entry->gp_seq_req, __entry->level,
135 __entry->c, __entry->level, __entry->grplo, __entry->grphi, 134 __entry->grplo, __entry->grphi, __entry->gpevent)
136 __entry->gpevent)
137); 135);
138 136
139/* 137/*
@@ -145,14 +143,14 @@ TRACE_EVENT(rcu_future_grace_period,
145 */ 143 */
146TRACE_EVENT(rcu_grace_period_init, 144TRACE_EVENT(rcu_grace_period_init,
147 145
148 TP_PROTO(const char *rcuname, unsigned long gpnum, u8 level, 146 TP_PROTO(const char *rcuname, unsigned long gp_seq, u8 level,
149 int grplo, int grphi, unsigned long qsmask), 147 int grplo, int grphi, unsigned long qsmask),
150 148
151 TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), 149 TP_ARGS(rcuname, gp_seq, level, grplo, grphi, qsmask),
152 150
153 TP_STRUCT__entry( 151 TP_STRUCT__entry(
154 __field(const char *, rcuname) 152 __field(const char *, rcuname)
155 __field(unsigned long, gpnum) 153 __field(unsigned long, gp_seq)
156 __field(u8, level) 154 __field(u8, level)
157 __field(int, grplo) 155 __field(int, grplo)
158 __field(int, grphi) 156 __field(int, grphi)
@@ -161,7 +159,7 @@ TRACE_EVENT(rcu_grace_period_init,
161 159
162 TP_fast_assign( 160 TP_fast_assign(
163 __entry->rcuname = rcuname; 161 __entry->rcuname = rcuname;
164 __entry->gpnum = gpnum; 162 __entry->gp_seq = gp_seq;
165 __entry->level = level; 163 __entry->level = level;
166 __entry->grplo = grplo; 164 __entry->grplo = grplo;
167 __entry->grphi = grphi; 165 __entry->grphi = grphi;
@@ -169,7 +167,7 @@ TRACE_EVENT(rcu_grace_period_init,
169 ), 167 ),
170 168
171 TP_printk("%s %lu %u %d %d %lx", 169 TP_printk("%s %lu %u %d %d %lx",
172 __entry->rcuname, __entry->gpnum, __entry->level, 170 __entry->rcuname, __entry->gp_seq, __entry->level,
173 __entry->grplo, __entry->grphi, __entry->qsmask) 171 __entry->grplo, __entry->grphi, __entry->qsmask)
174); 172);
175 173
@@ -301,24 +299,24 @@ TRACE_EVENT(rcu_nocb_wake,
301 */ 299 */
302TRACE_EVENT(rcu_preempt_task, 300TRACE_EVENT(rcu_preempt_task,
303 301
304 TP_PROTO(const char *rcuname, int pid, unsigned long gpnum), 302 TP_PROTO(const char *rcuname, int pid, unsigned long gp_seq),
305 303
306 TP_ARGS(rcuname, pid, gpnum), 304 TP_ARGS(rcuname, pid, gp_seq),
307 305
308 TP_STRUCT__entry( 306 TP_STRUCT__entry(
309 __field(const char *, rcuname) 307 __field(const char *, rcuname)
310 __field(unsigned long, gpnum) 308 __field(unsigned long, gp_seq)
311 __field(int, pid) 309 __field(int, pid)
312 ), 310 ),
313 311
314 TP_fast_assign( 312 TP_fast_assign(
315 __entry->rcuname = rcuname; 313 __entry->rcuname = rcuname;
316 __entry->gpnum = gpnum; 314 __entry->gp_seq = gp_seq;
317 __entry->pid = pid; 315 __entry->pid = pid;
318 ), 316 ),
319 317
320 TP_printk("%s %lu %d", 318 TP_printk("%s %lu %d",
321 __entry->rcuname, __entry->gpnum, __entry->pid) 319 __entry->rcuname, __entry->gp_seq, __entry->pid)
322); 320);
323 321
324/* 322/*
@@ -328,23 +326,23 @@ TRACE_EVENT(rcu_preempt_task,
328 */ 326 */
329TRACE_EVENT(rcu_unlock_preempted_task, 327TRACE_EVENT(rcu_unlock_preempted_task,
330 328
331 TP_PROTO(const char *rcuname, unsigned long gpnum, int pid), 329 TP_PROTO(const char *rcuname, unsigned long gp_seq, int pid),
332 330
333 TP_ARGS(rcuname, gpnum, pid), 331 TP_ARGS(rcuname, gp_seq, pid),
334 332
335 TP_STRUCT__entry( 333 TP_STRUCT__entry(
336 __field(const char *, rcuname) 334 __field(const char *, rcuname)
337 __field(unsigned long, gpnum) 335 __field(unsigned long, gp_seq)
338 __field(int, pid) 336 __field(int, pid)
339 ), 337 ),
340 338
341 TP_fast_assign( 339 TP_fast_assign(
342 __entry->rcuname = rcuname; 340 __entry->rcuname = rcuname;
343 __entry->gpnum = gpnum; 341 __entry->gp_seq = gp_seq;
344 __entry->pid = pid; 342 __entry->pid = pid;
345 ), 343 ),
346 344
347 TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) 345 TP_printk("%s %lu %d", __entry->rcuname, __entry->gp_seq, __entry->pid)
348); 346);
349 347
350/* 348/*
@@ -357,15 +355,15 @@ TRACE_EVENT(rcu_unlock_preempted_task,
357 */ 355 */
358TRACE_EVENT(rcu_quiescent_state_report, 356TRACE_EVENT(rcu_quiescent_state_report,
359 357
360 TP_PROTO(const char *rcuname, unsigned long gpnum, 358 TP_PROTO(const char *rcuname, unsigned long gp_seq,
361 unsigned long mask, unsigned long qsmask, 359 unsigned long mask, unsigned long qsmask,
362 u8 level, int grplo, int grphi, int gp_tasks), 360 u8 level, int grplo, int grphi, int gp_tasks),
363 361
364 TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), 362 TP_ARGS(rcuname, gp_seq, mask, qsmask, level, grplo, grphi, gp_tasks),
365 363
366 TP_STRUCT__entry( 364 TP_STRUCT__entry(
367 __field(const char *, rcuname) 365 __field(const char *, rcuname)
368 __field(unsigned long, gpnum) 366 __field(unsigned long, gp_seq)
369 __field(unsigned long, mask) 367 __field(unsigned long, mask)
370 __field(unsigned long, qsmask) 368 __field(unsigned long, qsmask)
371 __field(u8, level) 369 __field(u8, level)
@@ -376,7 +374,7 @@ TRACE_EVENT(rcu_quiescent_state_report,
376 374
377 TP_fast_assign( 375 TP_fast_assign(
378 __entry->rcuname = rcuname; 376 __entry->rcuname = rcuname;
379 __entry->gpnum = gpnum; 377 __entry->gp_seq = gp_seq;
380 __entry->mask = mask; 378 __entry->mask = mask;
381 __entry->qsmask = qsmask; 379 __entry->qsmask = qsmask;
382 __entry->level = level; 380 __entry->level = level;
@@ -386,41 +384,41 @@ TRACE_EVENT(rcu_quiescent_state_report,
386 ), 384 ),
387 385
388 TP_printk("%s %lu %lx>%lx %u %d %d %u", 386 TP_printk("%s %lu %lx>%lx %u %d %d %u",
389 __entry->rcuname, __entry->gpnum, 387 __entry->rcuname, __entry->gp_seq,
390 __entry->mask, __entry->qsmask, __entry->level, 388 __entry->mask, __entry->qsmask, __entry->level,
391 __entry->grplo, __entry->grphi, __entry->gp_tasks) 389 __entry->grplo, __entry->grphi, __entry->gp_tasks)
392); 390);
393 391
394/* 392/*
395 * Tracepoint for quiescent states detected by force_quiescent_state(). 393 * Tracepoint for quiescent states detected by force_quiescent_state().
396 * These trace events include the type of RCU, the grace-period number that 394 * These trace events include the type of RCU, the grace-period number
397 * was blocked by the CPU, the CPU itself, and the type of quiescent state, 395 * that was blocked by the CPU, the CPU itself, and the type of quiescent
398 * which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, "kick" 396 * state, which can be "dti" for dyntick-idle mode, "kick" when kicking
399 * when kicking a CPU that has been in dyntick-idle mode for too long, or 397 * a CPU that has been in dyntick-idle mode for too long, or "rqc" if the
400 * "rqc" if the CPU got a quiescent state via its rcu_qs_ctr. 398 * CPU got a quiescent state via its rcu_qs_ctr.
401 */ 399 */
402TRACE_EVENT(rcu_fqs, 400TRACE_EVENT(rcu_fqs,
403 401
404 TP_PROTO(const char *rcuname, unsigned long gpnum, int cpu, const char *qsevent), 402 TP_PROTO(const char *rcuname, unsigned long gp_seq, int cpu, const char *qsevent),
405 403
406 TP_ARGS(rcuname, gpnum, cpu, qsevent), 404 TP_ARGS(rcuname, gp_seq, cpu, qsevent),
407 405
408 TP_STRUCT__entry( 406 TP_STRUCT__entry(
409 __field(const char *, rcuname) 407 __field(const char *, rcuname)
410 __field(unsigned long, gpnum) 408 __field(unsigned long, gp_seq)
411 __field(int, cpu) 409 __field(int, cpu)
412 __field(const char *, qsevent) 410 __field(const char *, qsevent)
413 ), 411 ),
414 412
415 TP_fast_assign( 413 TP_fast_assign(
416 __entry->rcuname = rcuname; 414 __entry->rcuname = rcuname;
417 __entry->gpnum = gpnum; 415 __entry->gp_seq = gp_seq;
418 __entry->cpu = cpu; 416 __entry->cpu = cpu;
419 __entry->qsevent = qsevent; 417 __entry->qsevent = qsevent;
420 ), 418 ),
421 419
422 TP_printk("%s %lu %d %s", 420 TP_printk("%s %lu %d %s",
423 __entry->rcuname, __entry->gpnum, 421 __entry->rcuname, __entry->gp_seq,
424 __entry->cpu, __entry->qsevent) 422 __entry->cpu, __entry->qsevent)
425); 423);
426 424
@@ -753,23 +751,23 @@ TRACE_EVENT(rcu_barrier,
753 751
754#else /* #ifdef CONFIG_RCU_TRACE */ 752#else /* #ifdef CONFIG_RCU_TRACE */
755 753
756#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) 754#define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0)
757#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ 755#define trace_rcu_future_grace_period(rcuname, gp_seq, gp_seq_req, \
758 level, grplo, grphi, event) \ 756 level, grplo, grphi, event) \
759 do { } while (0) 757 do { } while (0)
760#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ 758#define trace_rcu_grace_period_init(rcuname, gp_seq, level, grplo, grphi, \
761 qsmask) do { } while (0) 759 qsmask) do { } while (0)
762#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \ 760#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \
763 do { } while (0) 761 do { } while (0)
764#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \ 762#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \
765 do { } while (0) 763 do { } while (0)
766#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) 764#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
767#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) 765#define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0)
768#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) 766#define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0)
769#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ 767#define trace_rcu_quiescent_state_report(rcuname, gp_seq, mask, qsmask, level, \
770 grplo, grphi, gp_tasks) do { } \ 768 grplo, grphi, gp_tasks) do { } \
771 while (0) 769 while (0)
772#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) 770#define trace_rcu_fqs(rcuname, gp_seq, cpu, qsevent) do { } while (0)
773#define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0) 771#define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0)
774#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0) 772#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0)
775#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \ 773#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index fcf936656493..6b56a2208be7 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -49,6 +49,13 @@ struct __kernel_timespec {
49}; 49};
50#endif 50#endif
51 51
52#ifndef __kernel_itimerspec
53struct __kernel_itimerspec {
54 struct __kernel_timespec it_interval; /* timer period */
55 struct __kernel_timespec it_value; /* timer expiration */
56};
57#endif
58
52/* 59/*
53 * legacy timeval structure, only embedded in structures that 60 * legacy timeval structure, only embedded in structures that
54 * traditionally used 'timeval' to pass time intervals (not absolute 61 * traditionally used 'timeval' to pass time intervals (not absolute
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..38c68b593d0d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -79,7 +79,7 @@
79#include <linux/pti.h> 79#include <linux/pti.h>
80#include <linux/blkdev.h> 80#include <linux/blkdev.h>
81#include <linux/elevator.h> 81#include <linux/elevator.h>
82#include <linux/sched_clock.h> 82#include <linux/sched/clock.h>
83#include <linux/sched/task.h> 83#include <linux/sched/task.h>
84#include <linux/sched/task_stack.h> 84#include <linux/sched/task_stack.h>
85#include <linux/context_tracking.h> 85#include <linux/context_tracking.h>
@@ -561,8 +561,8 @@ asmlinkage __visible void __init start_kernel(void)
561 setup_command_line(command_line); 561 setup_command_line(command_line);
562 setup_nr_cpu_ids(); 562 setup_nr_cpu_ids();
563 setup_per_cpu_areas(); 563 setup_per_cpu_areas();
564 boot_cpu_state_init();
565 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ 564 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
565 boot_cpu_hotplug_init();
566 566
567 build_all_zonelists(NULL); 567 build_all_zonelists(NULL);
568 page_alloc_init(); 568 page_alloc_init();
@@ -642,7 +642,6 @@ asmlinkage __visible void __init start_kernel(void)
642 softirq_init(); 642 softirq_init();
643 timekeeping_init(); 643 timekeeping_init();
644 time_init(); 644 time_init();
645 sched_clock_postinit();
646 printk_safe_init(); 645 printk_safe_init();
647 perf_event_init(); 646 perf_event_init();
648 profile_init(); 647 profile_init();
@@ -697,6 +696,7 @@ asmlinkage __visible void __init start_kernel(void)
697 acpi_early_init(); 696 acpi_early_init();
698 if (late_time_init) 697 if (late_time_init)
699 late_time_init(); 698 late_time_init();
699 sched_clock_init();
700 calibrate_delay(); 700 calibrate_delay();
701 pid_idr_init(); 701 pid_idr_init();
702 anon_vma_init(); 702 anon_vma_init();
@@ -1065,6 +1065,13 @@ static int __ref kernel_init(void *unused)
1065 jump_label_invalidate_initmem(); 1065 jump_label_invalidate_initmem();
1066 free_initmem(); 1066 free_initmem();
1067 mark_readonly(); 1067 mark_readonly();
1068
1069 /*
1070 * Kernel mappings are now finalized - update the userspace page-table
1071 * to finalize PTI.
1072 */
1073 pti_finalize();
1074
1068 system_state = SYSTEM_RUNNING; 1075 system_state = SYSTEM_RUNNING;
1069 numa_default_policy(); 1076 numa_default_policy();
1070 1077
diff --git a/ipc/shm.c b/ipc/shm.c
index 051a3e1fb8df..fefa00d310fb 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -427,6 +427,17 @@ static int shm_split(struct vm_area_struct *vma, unsigned long addr)
427 return 0; 427 return 0;
428} 428}
429 429
430static unsigned long shm_pagesize(struct vm_area_struct *vma)
431{
432 struct file *file = vma->vm_file;
433 struct shm_file_data *sfd = shm_file_data(file);
434
435 if (sfd->vm_ops->pagesize)
436 return sfd->vm_ops->pagesize(vma);
437
438 return PAGE_SIZE;
439}
440
430#ifdef CONFIG_NUMA 441#ifdef CONFIG_NUMA
431static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 442static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
432{ 443{
@@ -554,6 +565,7 @@ static const struct vm_operations_struct shm_vm_ops = {
554 .close = shm_close, /* callback for when the vm-area is released */ 565 .close = shm_close, /* callback for when the vm-area is released */
555 .fault = shm_fault, 566 .fault = shm_fault,
556 .split = shm_split, 567 .split = shm_split,
568 .pagesize = shm_pagesize,
557#if defined(CONFIG_NUMA) 569#if defined(CONFIG_NUMA)
558 .set_policy = shm_set_policy, 570 .set_policy = shm_set_policy,
559 .get_policy = shm_get_policy, 571 .get_policy = shm_get_policy,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ceb1c4596c51..80d672a11088 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1279,8 +1279,12 @@ static void show_special(struct audit_context *context, int *call_panic)
1279 break; 1279 break;
1280 case AUDIT_KERN_MODULE: 1280 case AUDIT_KERN_MODULE:
1281 audit_log_format(ab, "name="); 1281 audit_log_format(ab, "name=");
1282 audit_log_untrustedstring(ab, context->module.name); 1282 if (context->module.name) {
1283 kfree(context->module.name); 1283 audit_log_untrustedstring(ab, context->module.name);
1284 kfree(context->module.name);
1285 } else
1286 audit_log_format(ab, "(null)");
1287
1284 break; 1288 break;
1285 } 1289 }
1286 audit_log_end(ab); 1290 audit_log_end(ab);
@@ -2411,8 +2415,9 @@ void __audit_log_kern_module(char *name)
2411{ 2415{
2412 struct audit_context *context = audit_context(); 2416 struct audit_context *context = audit_context();
2413 2417
2414 context->module.name = kmalloc(strlen(name) + 1, GFP_KERNEL); 2418 context->module.name = kstrdup(name, GFP_KERNEL);
2415 strcpy(context->module.name, name); 2419 if (!context->module.name)
2420 audit_log_lost("out of memory in __audit_log_kern_module");
2416 context->type = AUDIT_KERN_MODULE; 2421 context->type = AUDIT_KERN_MODULE;
2417} 2422}
2418 2423
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 544e58f5f642..2aa55d030c77 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -378,7 +378,7 @@ static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
378 return -EINVAL; 378 return -EINVAL;
379 379
380 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 380 value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
381 if (!value_type || value_size > map->value_size) 381 if (!value_type || value_size != map->value_size)
382 return -EINVAL; 382 return -EINVAL;
383 383
384 return 0; 384 return 0;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 9704934252b3..2590700237c1 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1519,9 +1519,9 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
1519{ 1519{
1520 bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION; 1520 bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION;
1521 const struct btf_member *member; 1521 const struct btf_member *member;
1522 u32 meta_needed, last_offset;
1522 struct btf *btf = env->btf; 1523 struct btf *btf = env->btf;
1523 u32 struct_size = t->size; 1524 u32 struct_size = t->size;
1524 u32 meta_needed;
1525 u16 i; 1525 u16 i;
1526 1526
1527 meta_needed = btf_type_vlen(t) * sizeof(*member); 1527 meta_needed = btf_type_vlen(t) * sizeof(*member);
@@ -1534,6 +1534,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
1534 1534
1535 btf_verifier_log_type(env, t, NULL); 1535 btf_verifier_log_type(env, t, NULL);
1536 1536
1537 last_offset = 0;
1537 for_each_member(i, t, member) { 1538 for_each_member(i, t, member) {
1538 if (!btf_name_offset_valid(btf, member->name_off)) { 1539 if (!btf_name_offset_valid(btf, member->name_off)) {
1539 btf_verifier_log_member(env, t, member, 1540 btf_verifier_log_member(env, t, member,
@@ -1555,6 +1556,16 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
1555 return -EINVAL; 1556 return -EINVAL;
1556 } 1557 }
1557 1558
1559 /*
1560 * ">" instead of ">=" because the last member could be
1561 * "char a[0];"
1562 */
1563 if (last_offset > member->offset) {
1564 btf_verifier_log_member(env, t, member,
1565 "Invalid member bits_offset");
1566 return -EINVAL;
1567 }
1568
1558 if (BITS_ROUNDUP_BYTES(member->offset) > struct_size) { 1569 if (BITS_ROUNDUP_BYTES(member->offset) > struct_size) {
1559 btf_verifier_log_member(env, t, member, 1570 btf_verifier_log_member(env, t, member,
1560 "Memmber bits_offset exceeds its struct size"); 1571 "Memmber bits_offset exceeds its struct size");
@@ -1562,6 +1573,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
1562 } 1573 }
1563 1574
1564 btf_verifier_log_member(env, t, member, NULL); 1575 btf_verifier_log_member(env, t, member, NULL);
1576 last_offset = member->offset;
1565 } 1577 }
1566 1578
1567 return meta_needed; 1579 return meta_needed;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e0918d180f08..46f5f29605d4 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -69,7 +69,7 @@ struct bpf_cpu_map {
69}; 69};
70 70
71static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, 71static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
72 struct xdp_bulk_queue *bq); 72 struct xdp_bulk_queue *bq, bool in_napi_ctx);
73 73
74static u64 cpu_map_bitmap_size(const union bpf_attr *attr) 74static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
75{ 75{
@@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
375 struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); 375 struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
376 376
377 /* No concurrent bq_enqueue can run at this point */ 377 /* No concurrent bq_enqueue can run at this point */
378 bq_flush_to_queue(rcpu, bq); 378 bq_flush_to_queue(rcpu, bq, false);
379 } 379 }
380 free_percpu(rcpu->bulkq); 380 free_percpu(rcpu->bulkq);
381 /* Cannot kthread_stop() here, last put free rcpu resources */ 381 /* Cannot kthread_stop() here, last put free rcpu resources */
@@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = {
558}; 558};
559 559
560static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, 560static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
561 struct xdp_bulk_queue *bq) 561 struct xdp_bulk_queue *bq, bool in_napi_ctx)
562{ 562{
563 unsigned int processed = 0, drops = 0; 563 unsigned int processed = 0, drops = 0;
564 const int to_cpu = rcpu->cpu; 564 const int to_cpu = rcpu->cpu;
@@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
578 err = __ptr_ring_produce(q, xdpf); 578 err = __ptr_ring_produce(q, xdpf);
579 if (err) { 579 if (err) {
580 drops++; 580 drops++;
581 xdp_return_frame_rx_napi(xdpf); 581 if (likely(in_napi_ctx))
582 xdp_return_frame_rx_napi(xdpf);
583 else
584 xdp_return_frame(xdpf);
582 } 585 }
583 processed++; 586 processed++;
584 } 587 }
@@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
598 struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); 601 struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
599 602
600 if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) 603 if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
601 bq_flush_to_queue(rcpu, bq); 604 bq_flush_to_queue(rcpu, bq, true);
602 605
603 /* Notice, xdp_buff/page MUST be queued here, long enough for 606 /* Notice, xdp_buff/page MUST be queued here, long enough for
604 * driver to code invoking us to finished, due to driver 607 * driver to code invoking us to finished, due to driver
@@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map)
661 664
662 /* Flush all frames in bulkq to real queue */ 665 /* Flush all frames in bulkq to real queue */
663 bq = this_cpu_ptr(rcpu->bulkq); 666 bq = this_cpu_ptr(rcpu->bulkq);
664 bq_flush_to_queue(rcpu, bq); 667 bq_flush_to_queue(rcpu, bq, true);
665 668
666 /* If already running, costs spin_lock_irqsave + smb_mb */ 669 /* If already running, costs spin_lock_irqsave + smb_mb */
667 wake_up_process(rcpu->kthread); 670 wake_up_process(rcpu->kthread);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d361fc1e3bf3..750d45edae79 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -217,7 +217,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
217} 217}
218 218
219static int bq_xmit_all(struct bpf_dtab_netdev *obj, 219static int bq_xmit_all(struct bpf_dtab_netdev *obj,
220 struct xdp_bulk_queue *bq, u32 flags) 220 struct xdp_bulk_queue *bq, u32 flags,
221 bool in_napi_ctx)
221{ 222{
222 struct net_device *dev = obj->dev; 223 struct net_device *dev = obj->dev;
223 int sent = 0, drops = 0, err = 0; 224 int sent = 0, drops = 0, err = 0;
@@ -254,7 +255,10 @@ error:
254 struct xdp_frame *xdpf = bq->q[i]; 255 struct xdp_frame *xdpf = bq->q[i];
255 256
256 /* RX path under NAPI protection, can return frames faster */ 257 /* RX path under NAPI protection, can return frames faster */
257 xdp_return_frame_rx_napi(xdpf); 258 if (likely(in_napi_ctx))
259 xdp_return_frame_rx_napi(xdpf);
260 else
261 xdp_return_frame(xdpf);
258 drops++; 262 drops++;
259 } 263 }
260 goto out; 264 goto out;
@@ -286,7 +290,7 @@ void __dev_map_flush(struct bpf_map *map)
286 __clear_bit(bit, bitmap); 290 __clear_bit(bit, bitmap);
287 291
288 bq = this_cpu_ptr(dev->bulkq); 292 bq = this_cpu_ptr(dev->bulkq);
289 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); 293 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true);
290 } 294 }
291} 295}
292 296
@@ -316,7 +320,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
316 struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); 320 struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
317 321
318 if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) 322 if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
319 bq_xmit_all(obj, bq, 0); 323 bq_xmit_all(obj, bq, 0, true);
320 324
321 /* Ingress dev_rx will be the same for all xdp_frame's in 325 /* Ingress dev_rx will be the same for all xdp_frame's in
322 * bulk_queue, because bq stored per-CPU and must be flushed 326 * bulk_queue, because bq stored per-CPU and must be flushed
@@ -385,7 +389,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
385 __clear_bit(dev->bit, bitmap); 389 __clear_bit(dev->bit, bitmap);
386 390
387 bq = per_cpu_ptr(dev->bulkq, cpu); 391 bq = per_cpu_ptr(dev->bulkq, cpu);
388 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); 392 bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false);
389 } 393 }
390 } 394 }
391} 395}
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 98fb7938beea..c4d75c52b4fc 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1048,12 +1048,12 @@ static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1048 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1048 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1049 1049
1050 while (msg_data_left(msg)) { 1050 while (msg_data_left(msg)) {
1051 struct sk_msg_buff *m; 1051 struct sk_msg_buff *m = NULL;
1052 bool enospc = false; 1052 bool enospc = false;
1053 int copy; 1053 int copy;
1054 1054
1055 if (sk->sk_err) { 1055 if (sk->sk_err) {
1056 err = sk->sk_err; 1056 err = -sk->sk_err;
1057 goto out_err; 1057 goto out_err;
1058 } 1058 }
1059 1059
@@ -1116,8 +1116,11 @@ wait_for_sndbuf:
1116 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1116 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1117wait_for_memory: 1117wait_for_memory:
1118 err = sk_stream_wait_memory(sk, &timeo); 1118 err = sk_stream_wait_memory(sk, &timeo);
1119 if (err) 1119 if (err) {
1120 if (m && m != psock->cork)
1121 free_start_sg(sk, m);
1120 goto out_err; 1122 goto out_err;
1123 }
1121 } 1124 }
1122out_err: 1125out_err:
1123 if (err < 0) 1126 if (err < 0)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a31a1ba0f8ea..b41c6cf2eb88 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -575,7 +575,7 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
575{ 575{
576 int refold; 576 int refold;
577 577
578 refold = __atomic_add_unless(&map->refcnt, 1, 0); 578 refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
579 579
580 if (refold >= BPF_MAX_REFCNT) { 580 if (refold >= BPF_MAX_REFCNT) {
581 __bpf_map_put(map, false); 581 __bpf_map_put(map, false);
@@ -1144,7 +1144,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
1144{ 1144{
1145 int refold; 1145 int refold;
1146 1146
1147 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 1147 refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
1148 1148
1149 if (refold >= BPF_MAX_REFCNT) { 1149 if (refold >= BPF_MAX_REFCNT) {
1150 __bpf_prog_put(prog, false); 1150 __bpf_prog_put(prog, false);
diff --git a/kernel/compat.c b/kernel/compat.c
index 702aa846ddac..8e40efc2928a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -324,35 +324,6 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t, pid, unsigned int, len,
324 return ret; 324 return ret;
325} 325}
326 326
327/* Todo: Delete these extern declarations when get/put_compat_itimerspec64()
328 * are moved to kernel/time/time.c .
329 */
330extern int __compat_get_timespec64(struct timespec64 *ts64,
331 const struct compat_timespec __user *cts);
332extern int __compat_put_timespec64(const struct timespec64 *ts64,
333 struct compat_timespec __user *cts);
334
335int get_compat_itimerspec64(struct itimerspec64 *its,
336 const struct compat_itimerspec __user *uits)
337{
338
339 if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
340 __compat_get_timespec64(&its->it_value, &uits->it_value))
341 return -EFAULT;
342 return 0;
343}
344EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
345
346int put_compat_itimerspec64(const struct itimerspec64 *its,
347 struct compat_itimerspec __user *uits)
348{
349 if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
350 __compat_put_timespec64(&its->it_value, &uits->it_value))
351 return -EFAULT;
352 return 0;
353}
354EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
355
356/* 327/*
357 * We currently only need the following fields from the sigevent 328 * We currently only need the following fields from the sigevent
358 * structure: sigev_value, sigev_signo, sig_notify and (sometimes 329 * structure: sigev_value, sigev_signo, sig_notify and (sometimes
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0db8938fbb23..dd8634dde1ae 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1274,7 +1274,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
1274 * otherwise a RCU stall occurs. 1274 * otherwise a RCU stall occurs.
1275 */ 1275 */
1276 [CPUHP_TIMERS_PREPARE] = { 1276 [CPUHP_TIMERS_PREPARE] = {
1277 .name = "timers:dead", 1277 .name = "timers:prepare",
1278 .startup.single = timers_prepare_cpu, 1278 .startup.single = timers_prepare_cpu,
1279 .teardown.single = timers_dead_cpu, 1279 .teardown.single = timers_dead_cpu,
1280 }, 1280 },
@@ -1344,6 +1344,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
1344 .startup.single = perf_event_init_cpu, 1344 .startup.single = perf_event_init_cpu,
1345 .teardown.single = perf_event_exit_cpu, 1345 .teardown.single = perf_event_exit_cpu,
1346 }, 1346 },
1347 [CPUHP_AP_WATCHDOG_ONLINE] = {
1348 .name = "lockup_detector:online",
1349 .startup.single = lockup_detector_online_cpu,
1350 .teardown.single = lockup_detector_offline_cpu,
1351 },
1347 [CPUHP_AP_WORKQUEUE_ONLINE] = { 1352 [CPUHP_AP_WORKQUEUE_ONLINE] = {
1348 .name = "workqueue:online", 1353 .name = "workqueue:online",
1349 .startup.single = workqueue_online_cpu, 1354 .startup.single = workqueue_online_cpu,
@@ -2010,7 +2015,7 @@ void __init boot_cpu_init(void)
2010/* 2015/*
2011 * Must be called _AFTER_ setting up the per_cpu areas 2016 * Must be called _AFTER_ setting up the per_cpu areas
2012 */ 2017 */
2013void __init boot_cpu_state_init(void) 2018void __init boot_cpu_hotplug_init(void)
2014{ 2019{
2015 per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE; 2020 per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
2016} 2021}
diff --git a/kernel/fork.c b/kernel/fork.c
index 1b27babc4c78..9d8d0e016fc6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2276,6 +2276,8 @@ static void sighand_ctor(void *data)
2276 2276
2277void __init proc_caches_init(void) 2277void __init proc_caches_init(void)
2278{ 2278{
2279 unsigned int mm_size;
2280
2279 sighand_cachep = kmem_cache_create("sighand_cache", 2281 sighand_cachep = kmem_cache_create("sighand_cache",
2280 sizeof(struct sighand_struct), 0, 2282 sizeof(struct sighand_struct), 0,
2281 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| 2283 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -2292,15 +2294,16 @@ void __init proc_caches_init(void)
2292 sizeof(struct fs_struct), 0, 2294 sizeof(struct fs_struct), 0,
2293 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 2295 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
2294 NULL); 2296 NULL);
2297
2295 /* 2298 /*
2296 * FIXME! The "sizeof(struct mm_struct)" currently includes the 2299 * The mm_cpumask is located at the end of mm_struct, and is
2297 * whole struct cpumask for the OFFSTACK case. We could change 2300 * dynamically sized based on the maximum CPU number this system
2298 * this to *only* allocate as much of it as required by the 2301 * can have, taking hotplug into account (nr_cpu_ids).
2299 * maximum number of CPU's we can ever have. The cpumask_allocation
2300 * is at the end of the structure, exactly for that reason.
2301 */ 2302 */
2303 mm_size = sizeof(struct mm_struct) + cpumask_size();
2304
2302 mm_cachep = kmem_cache_create_usercopy("mm_struct", 2305 mm_cachep = kmem_cache_create_usercopy("mm_struct",
2303 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 2306 mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
2304 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 2307 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
2305 offsetof(struct mm_struct, saved_auxv), 2308 offsetof(struct mm_struct, saved_auxv),
2306 sizeof_field(struct mm_struct, saved_auxv), 2309 sizeof_field(struct mm_struct, saved_auxv),
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index c6766f326072..5f3e2baefca9 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -134,7 +134,6 @@ config GENERIC_IRQ_DEBUGFS
134endmenu 134endmenu
135 135
136config GENERIC_IRQ_MULTI_HANDLER 136config GENERIC_IRQ_MULTI_HANDLER
137 depends on !MULTI_IRQ_HANDLER
138 bool 137 bool
139 help 138 help
140 Allow to specify the low level IRQ handler at run time. 139 Allow to specify the low level IRQ handler at run time.
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index afc7f902d74a..578d0e5f1b5b 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -443,6 +443,7 @@ static void free_desc(unsigned int irq)
443 * We free the descriptor, masks and stat fields via RCU. That 443 * We free the descriptor, masks and stat fields via RCU. That
444 * allows demultiplex interrupts to do rcu based management of 444 * allows demultiplex interrupts to do rcu based management of
445 * the child interrupts. 445 * the child interrupts.
446 * This also allows us to use rcu in kstat_irqs_usr().
446 */ 447 */
447 call_rcu(&desc->rcu, delayed_free_desc); 448 call_rcu(&desc->rcu, delayed_free_desc);
448} 449}
@@ -928,17 +929,17 @@ unsigned int kstat_irqs(unsigned int irq)
928 * kstat_irqs_usr - Get the statistics for an interrupt 929 * kstat_irqs_usr - Get the statistics for an interrupt
929 * @irq: The interrupt number 930 * @irq: The interrupt number
930 * 931 *
931 * Returns the sum of interrupt counts on all cpus since boot for 932 * Returns the sum of interrupt counts on all cpus since boot for @irq.
932 * @irq. Contrary to kstat_irqs() this can be called from any 933 * Contrary to kstat_irqs() this can be called from any context.
933 * preemptible context. It's protected against concurrent removal of 934 * It uses rcu since a concurrent removal of an interrupt descriptor is
934 * an interrupt descriptor when sparse irqs are enabled. 935 * observing an rcu grace period before delayed_free_desc()/irq_kobj_release().
935 */ 936 */
936unsigned int kstat_irqs_usr(unsigned int irq) 937unsigned int kstat_irqs_usr(unsigned int irq)
937{ 938{
938 unsigned int sum; 939 unsigned int sum;
939 940
940 irq_lock_sparse(); 941 rcu_read_lock();
941 sum = kstat_irqs(irq); 942 sum = kstat_irqs(irq);
942 irq_unlock_sparse(); 943 rcu_read_unlock();
943 return sum; 944 return sum;
944} 945}
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index daeabd791d58..fb86146037a7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -790,9 +790,19 @@ static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
790 790
791static int irq_wait_for_interrupt(struct irqaction *action) 791static int irq_wait_for_interrupt(struct irqaction *action)
792{ 792{
793 set_current_state(TASK_INTERRUPTIBLE); 793 for (;;) {
794 set_current_state(TASK_INTERRUPTIBLE);
794 795
795 while (!kthread_should_stop()) { 796 if (kthread_should_stop()) {
797 /* may need to run one last time */
798 if (test_and_clear_bit(IRQTF_RUNTHREAD,
799 &action->thread_flags)) {
800 __set_current_state(TASK_RUNNING);
801 return 0;
802 }
803 __set_current_state(TASK_RUNNING);
804 return -1;
805 }
796 806
797 if (test_and_clear_bit(IRQTF_RUNTHREAD, 807 if (test_and_clear_bit(IRQTF_RUNTHREAD,
798 &action->thread_flags)) { 808 &action->thread_flags)) {
@@ -800,10 +810,7 @@ static int irq_wait_for_interrupt(struct irqaction *action)
800 return 0; 810 return 0;
801 } 811 }
802 schedule(); 812 schedule();
803 set_current_state(TASK_INTERRUPTIBLE);
804 } 813 }
805 __set_current_state(TASK_RUNNING);
806 return -1;
807} 814}
808 815
809/* 816/*
@@ -1024,11 +1031,8 @@ static int irq_thread(void *data)
1024 /* 1031 /*
1025 * This is the regular exit path. __free_irq() is stopping the 1032 * This is the regular exit path. __free_irq() is stopping the
1026 * thread via kthread_stop() after calling 1033 * thread via kthread_stop() after calling
1027 * synchronize_irq(). So neither IRQTF_RUNTHREAD nor the 1034 * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the
1028 * oneshot mask bit can be set. We cannot verify that as we 1035 * oneshot mask bit can be set.
1029 * cannot touch the oneshot mask at this point anymore as
1030 * __setup_irq() might have given out currents thread_mask
1031 * again.
1032 */ 1036 */
1033 task_work_cancel(current, irq_thread_dtor); 1037 task_work_cancel(current, irq_thread_dtor);
1034 return 0; 1038 return 0;
@@ -1068,6 +1072,13 @@ static int irq_setup_forced_threading(struct irqaction *new)
1068 if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) 1072 if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
1069 return 0; 1073 return 0;
1070 1074
1075 /*
1076 * No further action required for interrupts which are requested as
1077 * threaded interrupts already
1078 */
1079 if (new->handler == irq_default_primary_handler)
1080 return 0;
1081
1071 new->flags |= IRQF_ONESHOT; 1082 new->flags |= IRQF_ONESHOT;
1072 1083
1073 /* 1084 /*
@@ -1075,7 +1086,7 @@ static int irq_setup_forced_threading(struct irqaction *new)
1075 * thread handler. We force thread them as well by creating a 1086 * thread handler. We force thread them as well by creating a
1076 * secondary action. 1087 * secondary action.
1077 */ 1088 */
1078 if (new->handler != irq_default_primary_handler && new->thread_fn) { 1089 if (new->handler && new->thread_fn) {
1079 /* Allocate the secondary action */ 1090 /* Allocate the secondary action */
1080 new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); 1091 new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
1081 if (!new->secondary) 1092 if (!new->secondary)
@@ -1244,8 +1255,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1244 1255
1245 /* 1256 /*
1246 * Protects against a concurrent __free_irq() call which might wait 1257 * Protects against a concurrent __free_irq() call which might wait
1247 * for synchronize_irq() to complete without holding the optional 1258 * for synchronize_hardirq() to complete without holding the optional
1248 * chip bus lock and desc->lock. 1259 * chip bus lock and desc->lock. Also protects against handing out
1260 * a recycled oneshot thread_mask bit while it's still in use by
1261 * its previous owner.
1249 */ 1262 */
1250 mutex_lock(&desc->request_mutex); 1263 mutex_lock(&desc->request_mutex);
1251 1264
@@ -1564,9 +1577,6 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
1564 1577
1565 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); 1578 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
1566 1579
1567 if (!desc)
1568 return NULL;
1569
1570 mutex_lock(&desc->request_mutex); 1580 mutex_lock(&desc->request_mutex);
1571 chip_bus_lock(desc); 1581 chip_bus_lock(desc);
1572 raw_spin_lock_irqsave(&desc->lock, flags); 1582 raw_spin_lock_irqsave(&desc->lock, flags);
@@ -1613,11 +1623,11 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
1613 /* 1623 /*
1614 * Drop bus_lock here so the changes which were done in the chip 1624 * Drop bus_lock here so the changes which were done in the chip
1615 * callbacks above are synced out to the irq chips which hang 1625 * callbacks above are synced out to the irq chips which hang
1616 * behind a slow bus (I2C, SPI) before calling synchronize_irq(). 1626 * behind a slow bus (I2C, SPI) before calling synchronize_hardirq().
1617 * 1627 *
1618 * Aside of that the bus_lock can also be taken from the threaded 1628 * Aside of that the bus_lock can also be taken from the threaded
1619 * handler in irq_finalize_oneshot() which results in a deadlock 1629 * handler in irq_finalize_oneshot() which results in a deadlock
1620 * because synchronize_irq() would wait forever for the thread to 1630 * because kthread_stop() would wait forever for the thread to
1621 * complete, which is blocked on the bus lock. 1631 * complete, which is blocked on the bus lock.
1622 * 1632 *
1623 * The still held desc->request_mutex() protects against a 1633 * The still held desc->request_mutex() protects against a
@@ -1629,7 +1639,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
1629 unregister_handler_proc(irq, action); 1639 unregister_handler_proc(irq, action);
1630 1640
1631 /* Make sure it's not being used on another CPU: */ 1641 /* Make sure it's not being used on another CPU: */
1632 synchronize_irq(irq); 1642 synchronize_hardirq(irq);
1633 1643
1634#ifdef CONFIG_DEBUG_SHIRQ 1644#ifdef CONFIG_DEBUG_SHIRQ
1635 /* 1645 /*
@@ -1638,7 +1648,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
1638 * is so by doing an extra call to the handler .... 1648 * is so by doing an extra call to the handler ....
1639 * 1649 *
1640 * ( We do this after actually deregistering it, to make sure that a 1650 * ( We do this after actually deregistering it, to make sure that a
1641 * 'real' IRQ doesn't run in * parallel with our fake. ) 1651 * 'real' IRQ doesn't run in parallel with our fake. )
1642 */ 1652 */
1643 if (action->flags & IRQF_SHARED) { 1653 if (action->flags & IRQF_SHARED) {
1644 local_irq_save(flags); 1654 local_irq_save(flags);
@@ -1647,6 +1657,12 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
1647 } 1657 }
1648#endif 1658#endif
1649 1659
1660 /*
1661 * The action has already been removed above, but the thread writes
1662 * its oneshot mask bit when it completes. Though request_mutex is
1663 * held across this which prevents __setup_irq() from handing out
1664 * the same bit to a newly requested action.
1665 */
1650 if (action->thread) { 1666 if (action->thread) {
1651 kthread_stop(action->thread); 1667 kthread_stop(action->thread);
1652 put_task_struct(action->thread); 1668 put_task_struct(action->thread);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 37eda10f5c36..da9addb8d655 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -475,22 +475,24 @@ int show_interrupts(struct seq_file *p, void *v)
475 seq_putc(p, '\n'); 475 seq_putc(p, '\n');
476 } 476 }
477 477
478 irq_lock_sparse(); 478 rcu_read_lock();
479 desc = irq_to_desc(i); 479 desc = irq_to_desc(i);
480 if (!desc) 480 if (!desc)
481 goto outsparse; 481 goto outsparse;
482 482
483 raw_spin_lock_irqsave(&desc->lock, flags); 483 if (desc->kstat_irqs)
484 for_each_online_cpu(j) 484 for_each_online_cpu(j)
485 any_count |= kstat_irqs_cpu(i, j); 485 any_count |= *per_cpu_ptr(desc->kstat_irqs, j);
486 action = desc->action; 486
487 if ((!action || irq_desc_is_chained(desc)) && !any_count) 487 if ((!desc->action || irq_desc_is_chained(desc)) && !any_count)
488 goto out; 488 goto outsparse;
489 489
490 seq_printf(p, "%*d: ", prec, i); 490 seq_printf(p, "%*d: ", prec, i);
491 for_each_online_cpu(j) 491 for_each_online_cpu(j)
492 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 492 seq_printf(p, "%10u ", desc->kstat_irqs ?
493 *per_cpu_ptr(desc->kstat_irqs, j) : 0);
493 494
495 raw_spin_lock_irqsave(&desc->lock, flags);
494 if (desc->irq_data.chip) { 496 if (desc->irq_data.chip) {
495 if (desc->irq_data.chip->irq_print_chip) 497 if (desc->irq_data.chip->irq_print_chip)
496 desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); 498 desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
@@ -511,6 +513,7 @@ int show_interrupts(struct seq_file *p, void *v)
511 if (desc->name) 513 if (desc->name)
512 seq_printf(p, "-%-8s", desc->name); 514 seq_printf(p, "-%-8s", desc->name);
513 515
516 action = desc->action;
514 if (action) { 517 if (action) {
515 seq_printf(p, " %s", action->name); 518 seq_printf(p, " %s", action->name);
516 while ((action = action->next) != NULL) 519 while ((action = action->next) != NULL)
@@ -518,10 +521,9 @@ int show_interrupts(struct seq_file *p, void *v)
518 } 521 }
519 522
520 seq_putc(p, '\n'); 523 seq_putc(p, '\n');
521out:
522 raw_spin_unlock_irqrestore(&desc->lock, flags); 524 raw_spin_unlock_irqrestore(&desc->lock, flags);
523outsparse: 525outsparse:
524 irq_unlock_sparse(); 526 rcu_read_unlock();
525 return 0; 527 return 0;
526} 528}
527#endif 529#endif
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 486dedbd9af5..087d18d771b5 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -190,7 +190,7 @@ static void __kthread_parkme(struct kthread *self)
190 if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) 190 if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
191 break; 191 break;
192 192
193 complete_all(&self->parked); 193 complete(&self->parked);
194 schedule(); 194 schedule();
195 } 195 }
196 __set_current_state(TASK_RUNNING); 196 __set_current_state(TASK_RUNNING);
@@ -471,7 +471,6 @@ void kthread_unpark(struct task_struct *k)
471 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) 471 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
472 __kthread_bind(k, kthread->cpu, TASK_PARKED); 472 __kthread_bind(k, kthread->cpu, TASK_PARKED);
473 473
474 reinit_completion(&kthread->parked);
475 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 474 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
476 /* 475 /*
477 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup. 476 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
@@ -499,6 +498,9 @@ int kthread_park(struct task_struct *k)
499 if (WARN_ON(k->flags & PF_EXITING)) 498 if (WARN_ON(k->flags & PF_EXITING))
500 return -ENOSYS; 499 return -ENOSYS;
501 500
501 if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
502 return -EBUSY;
503
502 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 504 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
503 if (k != current) { 505 if (k != current) {
504 wake_up_process(k); 506 wake_up_process(k);
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8402b3349dca..57bef4fbfb31 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -21,6 +21,9 @@
21 * Davidlohr Bueso <dave@stgolabs.net> 21 * Davidlohr Bueso <dave@stgolabs.net>
22 * Based on kernel/rcu/torture.c. 22 * Based on kernel/rcu/torture.c.
23 */ 23 */
24
25#define pr_fmt(fmt) fmt
26
24#include <linux/kernel.h> 27#include <linux/kernel.h>
25#include <linux/module.h> 28#include <linux/module.h>
26#include <linux/kthread.h> 29#include <linux/kthread.h>
@@ -57,7 +60,7 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
57torture_param(int, stat_interval, 60, 60torture_param(int, stat_interval, 60,
58 "Number of seconds between stats printk()s"); 61 "Number of seconds between stats printk()s");
59torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable"); 62torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
60torture_param(bool, verbose, true, 63torture_param(int, verbose, 1,
61 "Enable verbose debugging printk()s"); 64 "Enable verbose debugging printk()s");
62 65
63static char *torture_type = "spin_lock"; 66static char *torture_type = "spin_lock";
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 87331565e505..70178f6ffdc4 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -92,7 +92,7 @@ static void s2idle_enter(void)
92 /* Push all the CPUs into the idle loop. */ 92 /* Push all the CPUs into the idle loop. */
93 wake_up_all_idle_cpus(); 93 wake_up_all_idle_cpus();
94 /* Make the current CPU wait so it can enter the idle loop too. */ 94 /* Make the current CPU wait so it can enter the idle loop too. */
95 swait_event(s2idle_wait_head, 95 swait_event_exclusive(s2idle_wait_head,
96 s2idle_state == S2IDLE_STATE_WAKE); 96 s2idle_state == S2IDLE_STATE_WAKE);
97 97
98 cpuidle_pause(); 98 cpuidle_pause();
@@ -160,7 +160,7 @@ void s2idle_wake(void)
160 raw_spin_lock_irqsave(&s2idle_lock, flags); 160 raw_spin_lock_irqsave(&s2idle_lock, flags);
161 if (s2idle_state > S2IDLE_STATE_NONE) { 161 if (s2idle_state > S2IDLE_STATE_NONE) {
162 s2idle_state = S2IDLE_STATE_WAKE; 162 s2idle_state = S2IDLE_STATE_WAKE;
163 swake_up(&s2idle_wait_head); 163 swake_up_one(&s2idle_wait_head);
164 } 164 }
165 raw_spin_unlock_irqrestore(&s2idle_lock, flags); 165 raw_spin_unlock_irqrestore(&s2idle_lock, flags);
166} 166}
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 40cea6735c2d..4d04683c31b2 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -91,7 +91,17 @@ static inline void rcu_seq_end(unsigned long *sp)
91 WRITE_ONCE(*sp, rcu_seq_endval(sp)); 91 WRITE_ONCE(*sp, rcu_seq_endval(sp));
92} 92}
93 93
94/* Take a snapshot of the update side's sequence number. */ 94/*
95 * rcu_seq_snap - Take a snapshot of the update side's sequence number.
96 *
97 * This function returns the earliest value of the grace-period sequence number
98 * that will indicate that a full grace period has elapsed since the current
99 * time. Once the grace-period sequence number has reached this value, it will
100 * be safe to invoke all callbacks that have been registered prior to the
101 * current time. This value is the current grace-period number plus two to the
102 * power of the number of low-order bits reserved for state, then rounded up to
103 * the next value in which the state bits are all zero.
104 */
95static inline unsigned long rcu_seq_snap(unsigned long *sp) 105static inline unsigned long rcu_seq_snap(unsigned long *sp)
96{ 106{
97 unsigned long s; 107 unsigned long s;
@@ -108,6 +118,15 @@ static inline unsigned long rcu_seq_current(unsigned long *sp)
108} 118}
109 119
110/* 120/*
121 * Given a snapshot from rcu_seq_snap(), determine whether or not the
122 * corresponding update-side operation has started.
123 */
124static inline bool rcu_seq_started(unsigned long *sp, unsigned long s)
125{
126 return ULONG_CMP_LT((s - 1) & ~RCU_SEQ_STATE_MASK, READ_ONCE(*sp));
127}
128
129/*
111 * Given a snapshot from rcu_seq_snap(), determine whether or not a 130 * Given a snapshot from rcu_seq_snap(), determine whether or not a
112 * full update-side operation has occurred. 131 * full update-side operation has occurred.
113 */ 132 */
@@ -117,6 +136,45 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
117} 136}
118 137
119/* 138/*
139 * Has a grace period completed since the time the old gp_seq was collected?
140 */
141static inline bool rcu_seq_completed_gp(unsigned long old, unsigned long new)
142{
143 return ULONG_CMP_LT(old, new & ~RCU_SEQ_STATE_MASK);
144}
145
146/*
147 * Has a grace period started since the time the old gp_seq was collected?
148 */
149static inline bool rcu_seq_new_gp(unsigned long old, unsigned long new)
150{
151 return ULONG_CMP_LT((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK,
152 new);
153}
154
155/*
156 * Roughly how many full grace periods have elapsed between the collection
157 * of the two specified grace periods?
158 */
159static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old)
160{
161 unsigned long rnd_diff;
162
163 if (old == new)
164 return 0;
165 /*
166 * Compute the number of grace periods (still shifted up), plus
167 * one if either of new and old is not an exact grace period.
168 */
169 rnd_diff = (new & ~RCU_SEQ_STATE_MASK) -
170 ((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK) +
171 ((new & RCU_SEQ_STATE_MASK) || (old & RCU_SEQ_STATE_MASK));
172 if (ULONG_CMP_GE(RCU_SEQ_STATE_MASK, rnd_diff))
173 return 1; /* Definitely no grace period has elapsed. */
174 return ((rnd_diff - RCU_SEQ_STATE_MASK - 1) >> RCU_SEQ_CTR_SHIFT) + 2;
175}
176
177/*
120 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally 178 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
121 * by call_rcu() and rcu callback execution, and are therefore not part of the 179 * by call_rcu() and rcu callback execution, and are therefore not part of the
122 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. 180 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
@@ -276,6 +334,9 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
276/* Is this rcu_node a leaf? */ 334/* Is this rcu_node a leaf? */
277#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1) 335#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
278 336
337/* Is this rcu_node the last leaf? */
338#define rcu_is_last_leaf_node(rsp, rnp) ((rnp) == &(rsp)->node[rcu_num_nodes - 1])
339
279/* 340/*
280 * Do a full breadth-first scan of the rcu_node structures for the 341 * Do a full breadth-first scan of the rcu_node structures for the
281 * specified rcu_state structure. 342 * specified rcu_state structure.
@@ -405,8 +466,7 @@ enum rcutorture_type {
405 466
406#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) 467#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
407void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, 468void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
408 unsigned long *gpnum, unsigned long *completed); 469 unsigned long *gp_seq);
409void rcutorture_record_test_transition(void);
410void rcutorture_record_progress(unsigned long vernum); 470void rcutorture_record_progress(unsigned long vernum);
411void do_trace_rcu_torture_read(const char *rcutorturename, 471void do_trace_rcu_torture_read(const char *rcutorturename,
412 struct rcu_head *rhp, 472 struct rcu_head *rhp,
@@ -415,15 +475,11 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
415 unsigned long c); 475 unsigned long c);
416#else 476#else
417static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, 477static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
418 int *flags, 478 int *flags, unsigned long *gp_seq)
419 unsigned long *gpnum,
420 unsigned long *completed)
421{ 479{
422 *flags = 0; 480 *flags = 0;
423 *gpnum = 0; 481 *gp_seq = 0;
424 *completed = 0;
425} 482}
426static inline void rcutorture_record_test_transition(void) { }
427static inline void rcutorture_record_progress(unsigned long vernum) { } 483static inline void rcutorture_record_progress(unsigned long vernum) { }
428#ifdef CONFIG_RCU_TRACE 484#ifdef CONFIG_RCU_TRACE
429void do_trace_rcu_torture_read(const char *rcutorturename, 485void do_trace_rcu_torture_read(const char *rcutorturename,
@@ -441,31 +497,26 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
441 497
442static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, 498static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
443 struct srcu_struct *sp, int *flags, 499 struct srcu_struct *sp, int *flags,
444 unsigned long *gpnum, 500 unsigned long *gp_seq)
445 unsigned long *completed)
446{ 501{
447 if (test_type != SRCU_FLAVOR) 502 if (test_type != SRCU_FLAVOR)
448 return; 503 return;
449 *flags = 0; 504 *flags = 0;
450 *completed = sp->srcu_idx; 505 *gp_seq = sp->srcu_idx;
451 *gpnum = *completed;
452} 506}
453 507
454#elif defined(CONFIG_TREE_SRCU) 508#elif defined(CONFIG_TREE_SRCU)
455 509
456void srcutorture_get_gp_data(enum rcutorture_type test_type, 510void srcutorture_get_gp_data(enum rcutorture_type test_type,
457 struct srcu_struct *sp, int *flags, 511 struct srcu_struct *sp, int *flags,
458 unsigned long *gpnum, unsigned long *completed); 512 unsigned long *gp_seq);
459 513
460#endif 514#endif
461 515
462#ifdef CONFIG_TINY_RCU 516#ifdef CONFIG_TINY_RCU
463static inline unsigned long rcu_batches_started(void) { return 0; } 517static inline unsigned long rcu_get_gp_seq(void) { return 0; }
464static inline unsigned long rcu_batches_started_bh(void) { return 0; } 518static inline unsigned long rcu_bh_get_gp_seq(void) { return 0; }
465static inline unsigned long rcu_batches_started_sched(void) { return 0; } 519static inline unsigned long rcu_sched_get_gp_seq(void) { return 0; }
466static inline unsigned long rcu_batches_completed(void) { return 0; }
467static inline unsigned long rcu_batches_completed_bh(void) { return 0; }
468static inline unsigned long rcu_batches_completed_sched(void) { return 0; }
469static inline unsigned long rcu_exp_batches_completed(void) { return 0; } 520static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
470static inline unsigned long rcu_exp_batches_completed_sched(void) { return 0; } 521static inline unsigned long rcu_exp_batches_completed_sched(void) { return 0; }
471static inline unsigned long 522static inline unsigned long
@@ -474,19 +525,16 @@ static inline void rcu_force_quiescent_state(void) { }
474static inline void rcu_bh_force_quiescent_state(void) { } 525static inline void rcu_bh_force_quiescent_state(void) { }
475static inline void rcu_sched_force_quiescent_state(void) { } 526static inline void rcu_sched_force_quiescent_state(void) { }
476static inline void show_rcu_gp_kthreads(void) { } 527static inline void show_rcu_gp_kthreads(void) { }
528static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
477#else /* #ifdef CONFIG_TINY_RCU */ 529#else /* #ifdef CONFIG_TINY_RCU */
478extern unsigned long rcutorture_testseq; 530unsigned long rcu_get_gp_seq(void);
479extern unsigned long rcutorture_vernum; 531unsigned long rcu_bh_get_gp_seq(void);
480unsigned long rcu_batches_started(void); 532unsigned long rcu_sched_get_gp_seq(void);
481unsigned long rcu_batches_started_bh(void);
482unsigned long rcu_batches_started_sched(void);
483unsigned long rcu_batches_completed(void);
484unsigned long rcu_batches_completed_bh(void);
485unsigned long rcu_batches_completed_sched(void);
486unsigned long rcu_exp_batches_completed(void); 533unsigned long rcu_exp_batches_completed(void);
487unsigned long rcu_exp_batches_completed_sched(void); 534unsigned long rcu_exp_batches_completed_sched(void);
488unsigned long srcu_batches_completed(struct srcu_struct *sp); 535unsigned long srcu_batches_completed(struct srcu_struct *sp);
489void show_rcu_gp_kthreads(void); 536void show_rcu_gp_kthreads(void);
537int rcu_get_gp_kthreads_prio(void);
490void rcu_force_quiescent_state(void); 538void rcu_force_quiescent_state(void);
491void rcu_bh_force_quiescent_state(void); 539void rcu_bh_force_quiescent_state(void);
492void rcu_sched_force_quiescent_state(void); 540void rcu_sched_force_quiescent_state(void);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index e232846516b3..34244523550e 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -19,6 +19,9 @@
19 * 19 *
20 * Authors: Paul E. McKenney <paulmck@us.ibm.com> 20 * Authors: Paul E. McKenney <paulmck@us.ibm.com>
21 */ 21 */
22
23#define pr_fmt(fmt) fmt
24
22#include <linux/types.h> 25#include <linux/types.h>
23#include <linux/kernel.h> 26#include <linux/kernel.h>
24#include <linux/init.h> 27#include <linux/init.h>
@@ -88,7 +91,7 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
88torture_param(int, nwriters, -1, "Number of RCU updater threads"); 91torture_param(int, nwriters, -1, "Number of RCU updater threads");
89torture_param(bool, shutdown, !IS_ENABLED(MODULE), 92torture_param(bool, shutdown, !IS_ENABLED(MODULE),
90 "Shutdown at end of performance tests."); 93 "Shutdown at end of performance tests.");
91torture_param(bool, verbose, true, "Enable verbose debugging printk()s"); 94torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
92torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); 95torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
93 96
94static char *perf_type = "rcu"; 97static char *perf_type = "rcu";
@@ -135,8 +138,8 @@ struct rcu_perf_ops {
135 void (*cleanup)(void); 138 void (*cleanup)(void);
136 int (*readlock)(void); 139 int (*readlock)(void);
137 void (*readunlock)(int idx); 140 void (*readunlock)(int idx);
138 unsigned long (*started)(void); 141 unsigned long (*get_gp_seq)(void);
139 unsigned long (*completed)(void); 142 unsigned long (*gp_diff)(unsigned long new, unsigned long old);
140 unsigned long (*exp_completed)(void); 143 unsigned long (*exp_completed)(void);
141 void (*async)(struct rcu_head *head, rcu_callback_t func); 144 void (*async)(struct rcu_head *head, rcu_callback_t func);
142 void (*gp_barrier)(void); 145 void (*gp_barrier)(void);
@@ -176,8 +179,8 @@ static struct rcu_perf_ops rcu_ops = {
176 .init = rcu_sync_perf_init, 179 .init = rcu_sync_perf_init,
177 .readlock = rcu_perf_read_lock, 180 .readlock = rcu_perf_read_lock,
178 .readunlock = rcu_perf_read_unlock, 181 .readunlock = rcu_perf_read_unlock,
179 .started = rcu_batches_started, 182 .get_gp_seq = rcu_get_gp_seq,
180 .completed = rcu_batches_completed, 183 .gp_diff = rcu_seq_diff,
181 .exp_completed = rcu_exp_batches_completed, 184 .exp_completed = rcu_exp_batches_completed,
182 .async = call_rcu, 185 .async = call_rcu,
183 .gp_barrier = rcu_barrier, 186 .gp_barrier = rcu_barrier,
@@ -206,8 +209,8 @@ static struct rcu_perf_ops rcu_bh_ops = {
206 .init = rcu_sync_perf_init, 209 .init = rcu_sync_perf_init,
207 .readlock = rcu_bh_perf_read_lock, 210 .readlock = rcu_bh_perf_read_lock,
208 .readunlock = rcu_bh_perf_read_unlock, 211 .readunlock = rcu_bh_perf_read_unlock,
209 .started = rcu_batches_started_bh, 212 .get_gp_seq = rcu_bh_get_gp_seq,
210 .completed = rcu_batches_completed_bh, 213 .gp_diff = rcu_seq_diff,
211 .exp_completed = rcu_exp_batches_completed_sched, 214 .exp_completed = rcu_exp_batches_completed_sched,
212 .async = call_rcu_bh, 215 .async = call_rcu_bh,
213 .gp_barrier = rcu_barrier_bh, 216 .gp_barrier = rcu_barrier_bh,
@@ -263,8 +266,8 @@ static struct rcu_perf_ops srcu_ops = {
263 .init = rcu_sync_perf_init, 266 .init = rcu_sync_perf_init,
264 .readlock = srcu_perf_read_lock, 267 .readlock = srcu_perf_read_lock,
265 .readunlock = srcu_perf_read_unlock, 268 .readunlock = srcu_perf_read_unlock,
266 .started = NULL, 269 .get_gp_seq = srcu_perf_completed,
267 .completed = srcu_perf_completed, 270 .gp_diff = rcu_seq_diff,
268 .exp_completed = srcu_perf_completed, 271 .exp_completed = srcu_perf_completed,
269 .async = srcu_call_rcu, 272 .async = srcu_call_rcu,
270 .gp_barrier = srcu_rcu_barrier, 273 .gp_barrier = srcu_rcu_barrier,
@@ -292,8 +295,8 @@ static struct rcu_perf_ops srcud_ops = {
292 .cleanup = srcu_sync_perf_cleanup, 295 .cleanup = srcu_sync_perf_cleanup,
293 .readlock = srcu_perf_read_lock, 296 .readlock = srcu_perf_read_lock,
294 .readunlock = srcu_perf_read_unlock, 297 .readunlock = srcu_perf_read_unlock,
295 .started = NULL, 298 .get_gp_seq = srcu_perf_completed,
296 .completed = srcu_perf_completed, 299 .gp_diff = rcu_seq_diff,
297 .exp_completed = srcu_perf_completed, 300 .exp_completed = srcu_perf_completed,
298 .async = srcu_call_rcu, 301 .async = srcu_call_rcu,
299 .gp_barrier = srcu_rcu_barrier, 302 .gp_barrier = srcu_rcu_barrier,
@@ -322,8 +325,8 @@ static struct rcu_perf_ops sched_ops = {
322 .init = rcu_sync_perf_init, 325 .init = rcu_sync_perf_init,
323 .readlock = sched_perf_read_lock, 326 .readlock = sched_perf_read_lock,
324 .readunlock = sched_perf_read_unlock, 327 .readunlock = sched_perf_read_unlock,
325 .started = rcu_batches_started_sched, 328 .get_gp_seq = rcu_sched_get_gp_seq,
326 .completed = rcu_batches_completed_sched, 329 .gp_diff = rcu_seq_diff,
327 .exp_completed = rcu_exp_batches_completed_sched, 330 .exp_completed = rcu_exp_batches_completed_sched,
328 .async = call_rcu_sched, 331 .async = call_rcu_sched,
329 .gp_barrier = rcu_barrier_sched, 332 .gp_barrier = rcu_barrier_sched,
@@ -350,8 +353,8 @@ static struct rcu_perf_ops tasks_ops = {
350 .init = rcu_sync_perf_init, 353 .init = rcu_sync_perf_init,
351 .readlock = tasks_perf_read_lock, 354 .readlock = tasks_perf_read_lock,
352 .readunlock = tasks_perf_read_unlock, 355 .readunlock = tasks_perf_read_unlock,
353 .started = rcu_no_completed, 356 .get_gp_seq = rcu_no_completed,
354 .completed = rcu_no_completed, 357 .gp_diff = rcu_seq_diff,
355 .async = call_rcu_tasks, 358 .async = call_rcu_tasks,
356 .gp_barrier = rcu_barrier_tasks, 359 .gp_barrier = rcu_barrier_tasks,
357 .sync = synchronize_rcu_tasks, 360 .sync = synchronize_rcu_tasks,
@@ -359,9 +362,11 @@ static struct rcu_perf_ops tasks_ops = {
359 .name = "tasks" 362 .name = "tasks"
360}; 363};
361 364
362static bool __maybe_unused torturing_tasks(void) 365static unsigned long rcuperf_seq_diff(unsigned long new, unsigned long old)
363{ 366{
364 return cur_ops == &tasks_ops; 367 if (!cur_ops->gp_diff)
368 return new - old;
369 return cur_ops->gp_diff(new, old);
365} 370}
366 371
367/* 372/*
@@ -444,8 +449,7 @@ rcu_perf_writer(void *arg)
444 b_rcu_perf_writer_started = 449 b_rcu_perf_writer_started =
445 cur_ops->exp_completed() / 2; 450 cur_ops->exp_completed() / 2;
446 } else { 451 } else {
447 b_rcu_perf_writer_started = 452 b_rcu_perf_writer_started = cur_ops->get_gp_seq();
448 cur_ops->completed();
449 } 453 }
450 } 454 }
451 455
@@ -502,7 +506,7 @@ retry:
502 cur_ops->exp_completed() / 2; 506 cur_ops->exp_completed() / 2;
503 } else { 507 } else {
504 b_rcu_perf_writer_finished = 508 b_rcu_perf_writer_finished =
505 cur_ops->completed(); 509 cur_ops->get_gp_seq();
506 } 510 }
507 if (shutdown) { 511 if (shutdown) {
508 smp_mb(); /* Assign before wake. */ 512 smp_mb(); /* Assign before wake. */
@@ -527,7 +531,7 @@ retry:
527 return 0; 531 return 0;
528} 532}
529 533
530static inline void 534static void
531rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag) 535rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag)
532{ 536{
533 pr_alert("%s" PERF_FLAG 537 pr_alert("%s" PERF_FLAG
@@ -582,8 +586,8 @@ rcu_perf_cleanup(void)
582 t_rcu_perf_writer_finished - 586 t_rcu_perf_writer_finished -
583 t_rcu_perf_writer_started, 587 t_rcu_perf_writer_started,
584 ngps, 588 ngps,
585 b_rcu_perf_writer_finished - 589 rcuperf_seq_diff(b_rcu_perf_writer_finished,
586 b_rcu_perf_writer_started); 590 b_rcu_perf_writer_started));
587 for (i = 0; i < nrealwriters; i++) { 591 for (i = 0; i < nrealwriters; i++) {
588 if (!writer_durations) 592 if (!writer_durations)
589 break; 593 break;
@@ -671,12 +675,11 @@ rcu_perf_init(void)
671 break; 675 break;
672 } 676 }
673 if (i == ARRAY_SIZE(perf_ops)) { 677 if (i == ARRAY_SIZE(perf_ops)) {
674 pr_alert("rcu-perf: invalid perf type: \"%s\"\n", 678 pr_alert("rcu-perf: invalid perf type: \"%s\"\n", perf_type);
675 perf_type);
676 pr_alert("rcu-perf types:"); 679 pr_alert("rcu-perf types:");
677 for (i = 0; i < ARRAY_SIZE(perf_ops); i++) 680 for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
678 pr_alert(" %s", perf_ops[i]->name); 681 pr_cont(" %s", perf_ops[i]->name);
679 pr_alert("\n"); 682 pr_cont("\n");
680 firsterr = -EINVAL; 683 firsterr = -EINVAL;
681 goto unwind; 684 goto unwind;
682 } 685 }
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 42fcb7f05fac..c596c6f1e457 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -22,6 +22,9 @@
22 * 22 *
23 * See also: Documentation/RCU/torture.txt 23 * See also: Documentation/RCU/torture.txt
24 */ 24 */
25
26#define pr_fmt(fmt) fmt
27
25#include <linux/types.h> 28#include <linux/types.h>
26#include <linux/kernel.h> 29#include <linux/kernel.h>
27#include <linux/init.h> 30#include <linux/init.h>
@@ -52,6 +55,7 @@
52#include <linux/torture.h> 55#include <linux/torture.h>
53#include <linux/vmalloc.h> 56#include <linux/vmalloc.h>
54#include <linux/sched/debug.h> 57#include <linux/sched/debug.h>
58#include <linux/sched/sysctl.h>
55 59
56#include "rcu.h" 60#include "rcu.h"
57 61
@@ -59,6 +63,19 @@ MODULE_LICENSE("GPL");
59MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>"); 63MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
60 64
61 65
66/* Bits for ->extendables field, extendables param, and related definitions. */
67#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */
68#define RCUTORTURE_RDR_MASK ((1 << RCUTORTURE_RDR_SHIFT) - 1)
69#define RCUTORTURE_RDR_BH 0x1 /* Extend readers by disabling bh. */
70#define RCUTORTURE_RDR_IRQ 0x2 /* ... disabling interrupts. */
71#define RCUTORTURE_RDR_PREEMPT 0x4 /* ... disabling preemption. */
72#define RCUTORTURE_RDR_RCU 0x8 /* ... entering another RCU reader. */
73#define RCUTORTURE_RDR_NBITS 4 /* Number of bits defined above. */
74#define RCUTORTURE_MAX_EXTEND (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | \
75 RCUTORTURE_RDR_PREEMPT)
76#define RCUTORTURE_RDR_MAX_LOOPS 0x7 /* Maximum reader extensions. */
77 /* Must be power of two minus one. */
78
62torture_param(int, cbflood_inter_holdoff, HZ, 79torture_param(int, cbflood_inter_holdoff, HZ,
63 "Holdoff between floods (jiffies)"); 80 "Holdoff between floods (jiffies)");
64torture_param(int, cbflood_intra_holdoff, 1, 81torture_param(int, cbflood_intra_holdoff, 1,
@@ -66,6 +83,8 @@ torture_param(int, cbflood_intra_holdoff, 1,
66torture_param(int, cbflood_n_burst, 3, "# bursts in flood, zero to disable"); 83torture_param(int, cbflood_n_burst, 3, "# bursts in flood, zero to disable");
67torture_param(int, cbflood_n_per_burst, 20000, 84torture_param(int, cbflood_n_per_burst, 20000,
68 "# callbacks per burst in flood"); 85 "# callbacks per burst in flood");
86torture_param(int, extendables, RCUTORTURE_MAX_EXTEND,
87 "Extend readers by disabling bh (1), irqs (2), or preempt (4)");
69torture_param(int, fqs_duration, 0, 88torture_param(int, fqs_duration, 0,
70 "Duration of fqs bursts (us), 0 to disable"); 89 "Duration of fqs bursts (us), 0 to disable");
71torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); 90torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
@@ -84,7 +103,7 @@ torture_param(int, object_debug, 0,
84 "Enable debug-object double call_rcu() testing"); 103 "Enable debug-object double call_rcu() testing");
85torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); 104torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
86torture_param(int, onoff_interval, 0, 105torture_param(int, onoff_interval, 0,
87 "Time between CPU hotplugs (s), 0=disable"); 106 "Time between CPU hotplugs (jiffies), 0=disable");
88torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles"); 107torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles");
89torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable."); 108torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
90torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable."); 109torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
@@ -101,7 +120,7 @@ torture_param(int, test_boost_interval, 7,
101 "Interval between boost tests, seconds."); 120 "Interval between boost tests, seconds.");
102torture_param(bool, test_no_idle_hz, true, 121torture_param(bool, test_no_idle_hz, true,
103 "Test support for tickless idle CPUs"); 122 "Test support for tickless idle CPUs");
104torture_param(bool, verbose, true, 123torture_param(int, verbose, 1,
105 "Enable verbose debugging printk()s"); 124 "Enable verbose debugging printk()s");
106 125
107static char *torture_type = "rcu"; 126static char *torture_type = "rcu";
@@ -148,9 +167,9 @@ static long n_rcu_torture_boost_ktrerror;
148static long n_rcu_torture_boost_rterror; 167static long n_rcu_torture_boost_rterror;
149static long n_rcu_torture_boost_failure; 168static long n_rcu_torture_boost_failure;
150static long n_rcu_torture_boosts; 169static long n_rcu_torture_boosts;
151static long n_rcu_torture_timers; 170static atomic_long_t n_rcu_torture_timers;
152static long n_barrier_attempts; 171static long n_barrier_attempts;
153static long n_barrier_successes; 172static long n_barrier_successes; /* did rcu_barrier test succeed? */
154static atomic_long_t n_cbfloods; 173static atomic_long_t n_cbfloods;
155static struct list_head rcu_torture_removed; 174static struct list_head rcu_torture_removed;
156 175
@@ -261,8 +280,8 @@ struct rcu_torture_ops {
261 int (*readlock)(void); 280 int (*readlock)(void);
262 void (*read_delay)(struct torture_random_state *rrsp); 281 void (*read_delay)(struct torture_random_state *rrsp);
263 void (*readunlock)(int idx); 282 void (*readunlock)(int idx);
264 unsigned long (*started)(void); 283 unsigned long (*get_gp_seq)(void);
265 unsigned long (*completed)(void); 284 unsigned long (*gp_diff)(unsigned long new, unsigned long old);
266 void (*deferred_free)(struct rcu_torture *p); 285 void (*deferred_free)(struct rcu_torture *p);
267 void (*sync)(void); 286 void (*sync)(void);
268 void (*exp_sync)(void); 287 void (*exp_sync)(void);
@@ -274,6 +293,8 @@ struct rcu_torture_ops {
274 void (*stats)(void); 293 void (*stats)(void);
275 int irq_capable; 294 int irq_capable;
276 int can_boost; 295 int can_boost;
296 int extendables;
297 int ext_irq_conflict;
277 const char *name; 298 const char *name;
278}; 299};
279 300
@@ -302,10 +323,10 @@ static void rcu_read_delay(struct torture_random_state *rrsp)
302 * force_quiescent_state. */ 323 * force_quiescent_state. */
303 324
304 if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) { 325 if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
305 started = cur_ops->completed(); 326 started = cur_ops->get_gp_seq();
306 ts = rcu_trace_clock_local(); 327 ts = rcu_trace_clock_local();
307 mdelay(longdelay_ms); 328 mdelay(longdelay_ms);
308 completed = cur_ops->completed(); 329 completed = cur_ops->get_gp_seq();
309 do_trace_rcu_torture_read(cur_ops->name, NULL, ts, 330 do_trace_rcu_torture_read(cur_ops->name, NULL, ts,
310 started, completed); 331 started, completed);
311 } 332 }
@@ -397,8 +418,8 @@ static struct rcu_torture_ops rcu_ops = {
397 .readlock = rcu_torture_read_lock, 418 .readlock = rcu_torture_read_lock,
398 .read_delay = rcu_read_delay, 419 .read_delay = rcu_read_delay,
399 .readunlock = rcu_torture_read_unlock, 420 .readunlock = rcu_torture_read_unlock,
400 .started = rcu_batches_started, 421 .get_gp_seq = rcu_get_gp_seq,
401 .completed = rcu_batches_completed, 422 .gp_diff = rcu_seq_diff,
402 .deferred_free = rcu_torture_deferred_free, 423 .deferred_free = rcu_torture_deferred_free,
403 .sync = synchronize_rcu, 424 .sync = synchronize_rcu,
404 .exp_sync = synchronize_rcu_expedited, 425 .exp_sync = synchronize_rcu_expedited,
@@ -439,8 +460,8 @@ static struct rcu_torture_ops rcu_bh_ops = {
439 .readlock = rcu_bh_torture_read_lock, 460 .readlock = rcu_bh_torture_read_lock,
440 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 461 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
441 .readunlock = rcu_bh_torture_read_unlock, 462 .readunlock = rcu_bh_torture_read_unlock,
442 .started = rcu_batches_started_bh, 463 .get_gp_seq = rcu_bh_get_gp_seq,
443 .completed = rcu_batches_completed_bh, 464 .gp_diff = rcu_seq_diff,
444 .deferred_free = rcu_bh_torture_deferred_free, 465 .deferred_free = rcu_bh_torture_deferred_free,
445 .sync = synchronize_rcu_bh, 466 .sync = synchronize_rcu_bh,
446 .exp_sync = synchronize_rcu_bh_expedited, 467 .exp_sync = synchronize_rcu_bh_expedited,
@@ -449,6 +470,8 @@ static struct rcu_torture_ops rcu_bh_ops = {
449 .fqs = rcu_bh_force_quiescent_state, 470 .fqs = rcu_bh_force_quiescent_state,
450 .stats = NULL, 471 .stats = NULL,
451 .irq_capable = 1, 472 .irq_capable = 1,
473 .extendables = (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ),
474 .ext_irq_conflict = RCUTORTURE_RDR_RCU,
452 .name = "rcu_bh" 475 .name = "rcu_bh"
453}; 476};
454 477
@@ -483,8 +506,7 @@ static struct rcu_torture_ops rcu_busted_ops = {
483 .readlock = rcu_torture_read_lock, 506 .readlock = rcu_torture_read_lock,
484 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 507 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
485 .readunlock = rcu_torture_read_unlock, 508 .readunlock = rcu_torture_read_unlock,
486 .started = rcu_no_completed, 509 .get_gp_seq = rcu_no_completed,
487 .completed = rcu_no_completed,
488 .deferred_free = rcu_busted_torture_deferred_free, 510 .deferred_free = rcu_busted_torture_deferred_free,
489 .sync = synchronize_rcu_busted, 511 .sync = synchronize_rcu_busted,
490 .exp_sync = synchronize_rcu_busted, 512 .exp_sync = synchronize_rcu_busted,
@@ -572,8 +594,7 @@ static struct rcu_torture_ops srcu_ops = {
572 .readlock = srcu_torture_read_lock, 594 .readlock = srcu_torture_read_lock,
573 .read_delay = srcu_read_delay, 595 .read_delay = srcu_read_delay,
574 .readunlock = srcu_torture_read_unlock, 596 .readunlock = srcu_torture_read_unlock,
575 .started = NULL, 597 .get_gp_seq = srcu_torture_completed,
576 .completed = srcu_torture_completed,
577 .deferred_free = srcu_torture_deferred_free, 598 .deferred_free = srcu_torture_deferred_free,
578 .sync = srcu_torture_synchronize, 599 .sync = srcu_torture_synchronize,
579 .exp_sync = srcu_torture_synchronize_expedited, 600 .exp_sync = srcu_torture_synchronize_expedited,
@@ -610,8 +631,7 @@ static struct rcu_torture_ops srcud_ops = {
610 .readlock = srcu_torture_read_lock, 631 .readlock = srcu_torture_read_lock,
611 .read_delay = srcu_read_delay, 632 .read_delay = srcu_read_delay,
612 .readunlock = srcu_torture_read_unlock, 633 .readunlock = srcu_torture_read_unlock,
613 .started = NULL, 634 .get_gp_seq = srcu_torture_completed,
614 .completed = srcu_torture_completed,
615 .deferred_free = srcu_torture_deferred_free, 635 .deferred_free = srcu_torture_deferred_free,
616 .sync = srcu_torture_synchronize, 636 .sync = srcu_torture_synchronize,
617 .exp_sync = srcu_torture_synchronize_expedited, 637 .exp_sync = srcu_torture_synchronize_expedited,
@@ -622,6 +642,26 @@ static struct rcu_torture_ops srcud_ops = {
622 .name = "srcud" 642 .name = "srcud"
623}; 643};
624 644
645/* As above, but broken due to inappropriate reader extension. */
646static struct rcu_torture_ops busted_srcud_ops = {
647 .ttype = SRCU_FLAVOR,
648 .init = srcu_torture_init,
649 .cleanup = srcu_torture_cleanup,
650 .readlock = srcu_torture_read_lock,
651 .read_delay = rcu_read_delay,
652 .readunlock = srcu_torture_read_unlock,
653 .get_gp_seq = srcu_torture_completed,
654 .deferred_free = srcu_torture_deferred_free,
655 .sync = srcu_torture_synchronize,
656 .exp_sync = srcu_torture_synchronize_expedited,
657 .call = srcu_torture_call,
658 .cb_barrier = srcu_torture_barrier,
659 .stats = srcu_torture_stats,
660 .irq_capable = 1,
661 .extendables = RCUTORTURE_MAX_EXTEND,
662 .name = "busted_srcud"
663};
664
625/* 665/*
626 * Definitions for sched torture testing. 666 * Definitions for sched torture testing.
627 */ 667 */
@@ -648,8 +688,8 @@ static struct rcu_torture_ops sched_ops = {
648 .readlock = sched_torture_read_lock, 688 .readlock = sched_torture_read_lock,
649 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 689 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
650 .readunlock = sched_torture_read_unlock, 690 .readunlock = sched_torture_read_unlock,
651 .started = rcu_batches_started_sched, 691 .get_gp_seq = rcu_sched_get_gp_seq,
652 .completed = rcu_batches_completed_sched, 692 .gp_diff = rcu_seq_diff,
653 .deferred_free = rcu_sched_torture_deferred_free, 693 .deferred_free = rcu_sched_torture_deferred_free,
654 .sync = synchronize_sched, 694 .sync = synchronize_sched,
655 .exp_sync = synchronize_sched_expedited, 695 .exp_sync = synchronize_sched_expedited,
@@ -660,6 +700,7 @@ static struct rcu_torture_ops sched_ops = {
660 .fqs = rcu_sched_force_quiescent_state, 700 .fqs = rcu_sched_force_quiescent_state,
661 .stats = NULL, 701 .stats = NULL,
662 .irq_capable = 1, 702 .irq_capable = 1,
703 .extendables = RCUTORTURE_MAX_EXTEND,
663 .name = "sched" 704 .name = "sched"
664}; 705};
665 706
@@ -687,8 +728,7 @@ static struct rcu_torture_ops tasks_ops = {
687 .readlock = tasks_torture_read_lock, 728 .readlock = tasks_torture_read_lock,
688 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 729 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
689 .readunlock = tasks_torture_read_unlock, 730 .readunlock = tasks_torture_read_unlock,
690 .started = rcu_no_completed, 731 .get_gp_seq = rcu_no_completed,
691 .completed = rcu_no_completed,
692 .deferred_free = rcu_tasks_torture_deferred_free, 732 .deferred_free = rcu_tasks_torture_deferred_free,
693 .sync = synchronize_rcu_tasks, 733 .sync = synchronize_rcu_tasks,
694 .exp_sync = synchronize_rcu_tasks, 734 .exp_sync = synchronize_rcu_tasks,
@@ -700,6 +740,13 @@ static struct rcu_torture_ops tasks_ops = {
700 .name = "tasks" 740 .name = "tasks"
701}; 741};
702 742
743static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
744{
745 if (!cur_ops->gp_diff)
746 return new - old;
747 return cur_ops->gp_diff(new, old);
748}
749
703static bool __maybe_unused torturing_tasks(void) 750static bool __maybe_unused torturing_tasks(void)
704{ 751{
705 return cur_ops == &tasks_ops; 752 return cur_ops == &tasks_ops;
@@ -726,6 +773,44 @@ static void rcu_torture_boost_cb(struct rcu_head *head)
726 smp_store_release(&rbip->inflight, 0); 773 smp_store_release(&rbip->inflight, 0);
727} 774}
728 775
776static int old_rt_runtime = -1;
777
778static void rcu_torture_disable_rt_throttle(void)
779{
780 /*
781 * Disable RT throttling so that rcutorture's boost threads don't get
782 * throttled. Only possible if rcutorture is built-in otherwise the
783 * user should manually do this by setting the sched_rt_period_us and
784 * sched_rt_runtime sysctls.
785 */
786 if (!IS_BUILTIN(CONFIG_RCU_TORTURE_TEST) || old_rt_runtime != -1)
787 return;
788
789 old_rt_runtime = sysctl_sched_rt_runtime;
790 sysctl_sched_rt_runtime = -1;
791}
792
793static void rcu_torture_enable_rt_throttle(void)
794{
795 if (!IS_BUILTIN(CONFIG_RCU_TORTURE_TEST) || old_rt_runtime == -1)
796 return;
797
798 sysctl_sched_rt_runtime = old_rt_runtime;
799 old_rt_runtime = -1;
800}
801
802static bool rcu_torture_boost_failed(unsigned long start, unsigned long end)
803{
804 if (end - start > test_boost_duration * HZ - HZ / 2) {
805 VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed");
806 n_rcu_torture_boost_failure++;
807
808 return true; /* failed */
809 }
810
811 return false; /* passed */
812}
813
729static int rcu_torture_boost(void *arg) 814static int rcu_torture_boost(void *arg)
730{ 815{
731 unsigned long call_rcu_time; 816 unsigned long call_rcu_time;
@@ -746,6 +831,21 @@ static int rcu_torture_boost(void *arg)
746 init_rcu_head_on_stack(&rbi.rcu); 831 init_rcu_head_on_stack(&rbi.rcu);
747 /* Each pass through the following loop does one boost-test cycle. */ 832 /* Each pass through the following loop does one boost-test cycle. */
748 do { 833 do {
834 /* Track if the test failed already in this test interval? */
835 bool failed = false;
836
837 /* Increment n_rcu_torture_boosts once per boost-test */
838 while (!kthread_should_stop()) {
839 if (mutex_trylock(&boost_mutex)) {
840 n_rcu_torture_boosts++;
841 mutex_unlock(&boost_mutex);
842 break;
843 }
844 schedule_timeout_uninterruptible(1);
845 }
846 if (kthread_should_stop())
847 goto checkwait;
848
749 /* Wait for the next test interval. */ 849 /* Wait for the next test interval. */
750 oldstarttime = boost_starttime; 850 oldstarttime = boost_starttime;
751 while (ULONG_CMP_LT(jiffies, oldstarttime)) { 851 while (ULONG_CMP_LT(jiffies, oldstarttime)) {
@@ -764,11 +864,10 @@ static int rcu_torture_boost(void *arg)
764 /* RCU core before ->inflight = 1. */ 864 /* RCU core before ->inflight = 1. */
765 smp_store_release(&rbi.inflight, 1); 865 smp_store_release(&rbi.inflight, 1);
766 call_rcu(&rbi.rcu, rcu_torture_boost_cb); 866 call_rcu(&rbi.rcu, rcu_torture_boost_cb);
767 if (jiffies - call_rcu_time > 867 /* Check if the boost test failed */
768 test_boost_duration * HZ - HZ / 2) { 868 failed = failed ||
769 VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed"); 869 rcu_torture_boost_failed(call_rcu_time,
770 n_rcu_torture_boost_failure++; 870 jiffies);
771 }
772 call_rcu_time = jiffies; 871 call_rcu_time = jiffies;
773 } 872 }
774 stutter_wait("rcu_torture_boost"); 873 stutter_wait("rcu_torture_boost");
@@ -777,6 +876,14 @@ static int rcu_torture_boost(void *arg)
777 } 876 }
778 877
779 /* 878 /*
879 * If boost never happened, then inflight will always be 1, in
880 * this case the boost check would never happen in the above
881 * loop so do another one here.
882 */
883 if (!failed && smp_load_acquire(&rbi.inflight))
884 rcu_torture_boost_failed(call_rcu_time, jiffies);
885
886 /*
780 * Set the start time of the next test interval. 887 * Set the start time of the next test interval.
781 * Yes, this is vulnerable to long delays, but such 888 * Yes, this is vulnerable to long delays, but such
782 * delays simply cause a false negative for the next 889 * delays simply cause a false negative for the next
@@ -788,7 +895,6 @@ static int rcu_torture_boost(void *arg)
788 if (mutex_trylock(&boost_mutex)) { 895 if (mutex_trylock(&boost_mutex)) {
789 boost_starttime = jiffies + 896 boost_starttime = jiffies +
790 test_boost_interval * HZ; 897 test_boost_interval * HZ;
791 n_rcu_torture_boosts++;
792 mutex_unlock(&boost_mutex); 898 mutex_unlock(&boost_mutex);
793 break; 899 break;
794 } 900 }
@@ -1010,7 +1116,7 @@ rcu_torture_writer(void *arg)
1010 break; 1116 break;
1011 } 1117 }
1012 } 1118 }
1013 rcutorture_record_progress(++rcu_torture_current_version); 1119 rcu_torture_current_version++;
1014 /* Cycle through nesting levels of rcu_expedite_gp() calls. */ 1120 /* Cycle through nesting levels of rcu_expedite_gp() calls. */
1015 if (can_expedite && 1121 if (can_expedite &&
1016 !(torture_random(&rand) & 0xff & (!!expediting - 1))) { 1122 !(torture_random(&rand) & 0xff & (!!expediting - 1))) {
@@ -1084,27 +1190,133 @@ static void rcu_torture_timer_cb(struct rcu_head *rhp)
1084} 1190}
1085 1191
1086/* 1192/*
1087 * RCU torture reader from timer handler. Dereferences rcu_torture_current, 1193 * Do one extension of an RCU read-side critical section using the
1088 * incrementing the corresponding element of the pipeline array. The 1194 * current reader state in readstate (set to zero for initial entry
1089 * counter in the element should never be greater than 1, otherwise, the 1195 * to extended critical section), set the new state as specified by
1090 * RCU implementation is broken. 1196 * newstate (set to zero for final exit from extended critical section),
1197 * and random-number-generator state in trsp. If this is neither the
1198 * beginning or end of the critical section and if there was actually a
1199 * change, do a ->read_delay().
1091 */ 1200 */
1092static void rcu_torture_timer(struct timer_list *unused) 1201static void rcutorture_one_extend(int *readstate, int newstate,
1202 struct torture_random_state *trsp)
1203{
1204 int idxnew = -1;
1205 int idxold = *readstate;
1206 int statesnew = ~*readstate & newstate;
1207 int statesold = *readstate & ~newstate;
1208
1209 WARN_ON_ONCE(idxold < 0);
1210 WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1);
1211
1212 /* First, put new protection in place to avoid critical-section gap. */
1213 if (statesnew & RCUTORTURE_RDR_BH)
1214 local_bh_disable();
1215 if (statesnew & RCUTORTURE_RDR_IRQ)
1216 local_irq_disable();
1217 if (statesnew & RCUTORTURE_RDR_PREEMPT)
1218 preempt_disable();
1219 if (statesnew & RCUTORTURE_RDR_RCU)
1220 idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT;
1221
1222 /* Next, remove old protection, irq first due to bh conflict. */
1223 if (statesold & RCUTORTURE_RDR_IRQ)
1224 local_irq_enable();
1225 if (statesold & RCUTORTURE_RDR_BH)
1226 local_bh_enable();
1227 if (statesold & RCUTORTURE_RDR_PREEMPT)
1228 preempt_enable();
1229 if (statesold & RCUTORTURE_RDR_RCU)
1230 cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT);
1231
1232 /* Delay if neither beginning nor end and there was a change. */
1233 if ((statesnew || statesold) && *readstate && newstate)
1234 cur_ops->read_delay(trsp);
1235
1236 /* Update the reader state. */
1237 if (idxnew == -1)
1238 idxnew = idxold & ~RCUTORTURE_RDR_MASK;
1239 WARN_ON_ONCE(idxnew < 0);
1240 WARN_ON_ONCE((idxnew >> RCUTORTURE_RDR_SHIFT) > 1);
1241 *readstate = idxnew | newstate;
1242 WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) < 0);
1243 WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) > 1);
1244}
1245
1246/* Return the biggest extendables mask given current RCU and boot parameters. */
1247static int rcutorture_extend_mask_max(void)
1248{
1249 int mask;
1250
1251 WARN_ON_ONCE(extendables & ~RCUTORTURE_MAX_EXTEND);
1252 mask = extendables & RCUTORTURE_MAX_EXTEND & cur_ops->extendables;
1253 mask = mask | RCUTORTURE_RDR_RCU;
1254 return mask;
1255}
1256
1257/* Return a random protection state mask, but with at least one bit set. */
1258static int
1259rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
1260{
1261 int mask = rcutorture_extend_mask_max();
1262 unsigned long randmask1 = torture_random(trsp) >> 8;
1263 unsigned long randmask2 = randmask1 >> 1;
1264
1265 WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
1266 /* Half the time lots of bits, half the time only one bit. */
1267 if (randmask1 & 0x1)
1268 mask = mask & randmask2;
1269 else
1270 mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
1271 if ((mask & RCUTORTURE_RDR_IRQ) &&
1272 !(mask & RCUTORTURE_RDR_BH) &&
1273 (oldmask & RCUTORTURE_RDR_BH))
1274 mask |= RCUTORTURE_RDR_BH; /* Can't enable bh w/irq disabled. */
1275 if ((mask & RCUTORTURE_RDR_IRQ) &&
1276 !(mask & cur_ops->ext_irq_conflict) &&
1277 (oldmask & cur_ops->ext_irq_conflict))
1278 mask |= cur_ops->ext_irq_conflict; /* Or if readers object. */
1279 return mask ?: RCUTORTURE_RDR_RCU;
1280}
1281
1282/*
1283 * Do a randomly selected number of extensions of an existing RCU read-side
1284 * critical section.
1285 */
1286static void rcutorture_loop_extend(int *readstate,
1287 struct torture_random_state *trsp)
1288{
1289 int i;
1290 int mask = rcutorture_extend_mask_max();
1291
1292 WARN_ON_ONCE(!*readstate); /* -Existing- RCU read-side critsect! */
1293 if (!((mask - 1) & mask))
1294 return; /* Current RCU flavor not extendable. */
1295 i = (torture_random(trsp) >> 3) & RCUTORTURE_RDR_MAX_LOOPS;
1296 while (i--) {
1297 mask = rcutorture_extend_mask(*readstate, trsp);
1298 rcutorture_one_extend(readstate, mask, trsp);
1299 }
1300}
1301
1302/*
1303 * Do one read-side critical section, returning false if there was
1304 * no data to read. Can be invoked both from process context and
1305 * from a timer handler.
1306 */
1307static bool rcu_torture_one_read(struct torture_random_state *trsp)
1093{ 1308{
1094 int idx;
1095 unsigned long started; 1309 unsigned long started;
1096 unsigned long completed; 1310 unsigned long completed;
1097 static DEFINE_TORTURE_RANDOM(rand); 1311 int newstate;
1098 static DEFINE_SPINLOCK(rand_lock);
1099 struct rcu_torture *p; 1312 struct rcu_torture *p;
1100 int pipe_count; 1313 int pipe_count;
1314 int readstate = 0;
1101 unsigned long long ts; 1315 unsigned long long ts;
1102 1316
1103 idx = cur_ops->readlock(); 1317 newstate = rcutorture_extend_mask(readstate, trsp);
1104 if (cur_ops->started) 1318 rcutorture_one_extend(&readstate, newstate, trsp);
1105 started = cur_ops->started(); 1319 started = cur_ops->get_gp_seq();
1106 else
1107 started = cur_ops->completed();
1108 ts = rcu_trace_clock_local(); 1320 ts = rcu_trace_clock_local();
1109 p = rcu_dereference_check(rcu_torture_current, 1321 p = rcu_dereference_check(rcu_torture_current,
1110 rcu_read_lock_bh_held() || 1322 rcu_read_lock_bh_held() ||
@@ -1112,39 +1324,50 @@ static void rcu_torture_timer(struct timer_list *unused)
1112 srcu_read_lock_held(srcu_ctlp) || 1324 srcu_read_lock_held(srcu_ctlp) ||
1113 torturing_tasks()); 1325 torturing_tasks());
1114 if (p == NULL) { 1326 if (p == NULL) {
1115 /* Leave because rcu_torture_writer is not yet underway */ 1327 /* Wait for rcu_torture_writer to get underway */
1116 cur_ops->readunlock(idx); 1328 rcutorture_one_extend(&readstate, 0, trsp);
1117 return; 1329 return false;
1118 } 1330 }
1119 if (p->rtort_mbtest == 0) 1331 if (p->rtort_mbtest == 0)
1120 atomic_inc(&n_rcu_torture_mberror); 1332 atomic_inc(&n_rcu_torture_mberror);
1121 spin_lock(&rand_lock); 1333 rcutorture_loop_extend(&readstate, trsp);
1122 cur_ops->read_delay(&rand);
1123 n_rcu_torture_timers++;
1124 spin_unlock(&rand_lock);
1125 preempt_disable(); 1334 preempt_disable();
1126 pipe_count = p->rtort_pipe_count; 1335 pipe_count = p->rtort_pipe_count;
1127 if (pipe_count > RCU_TORTURE_PIPE_LEN) { 1336 if (pipe_count > RCU_TORTURE_PIPE_LEN) {
1128 /* Should not happen, but... */ 1337 /* Should not happen, but... */
1129 pipe_count = RCU_TORTURE_PIPE_LEN; 1338 pipe_count = RCU_TORTURE_PIPE_LEN;
1130 } 1339 }
1131 completed = cur_ops->completed(); 1340 completed = cur_ops->get_gp_seq();
1132 if (pipe_count > 1) { 1341 if (pipe_count > 1) {
1133 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, 1342 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
1134 started, completed); 1343 ts, started, completed);
1135 rcu_ftrace_dump(DUMP_ALL); 1344 rcu_ftrace_dump(DUMP_ALL);
1136 } 1345 }
1137 __this_cpu_inc(rcu_torture_count[pipe_count]); 1346 __this_cpu_inc(rcu_torture_count[pipe_count]);
1138 completed = completed - started; 1347 completed = rcutorture_seq_diff(completed, started);
1139 if (cur_ops->started)
1140 completed++;
1141 if (completed > RCU_TORTURE_PIPE_LEN) { 1348 if (completed > RCU_TORTURE_PIPE_LEN) {
1142 /* Should not happen, but... */ 1349 /* Should not happen, but... */
1143 completed = RCU_TORTURE_PIPE_LEN; 1350 completed = RCU_TORTURE_PIPE_LEN;
1144 } 1351 }
1145 __this_cpu_inc(rcu_torture_batch[completed]); 1352 __this_cpu_inc(rcu_torture_batch[completed]);
1146 preempt_enable(); 1353 preempt_enable();
1147 cur_ops->readunlock(idx); 1354 rcutorture_one_extend(&readstate, 0, trsp);
1355 WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK);
1356 return true;
1357}
1358
1359static DEFINE_TORTURE_RANDOM_PERCPU(rcu_torture_timer_rand);
1360
1361/*
1362 * RCU torture reader from timer handler. Dereferences rcu_torture_current,
1363 * incrementing the corresponding element of the pipeline array. The
1364 * counter in the element should never be greater than 1, otherwise, the
1365 * RCU implementation is broken.
1366 */
1367static void rcu_torture_timer(struct timer_list *unused)
1368{
1369 atomic_long_inc(&n_rcu_torture_timers);
1370 (void)rcu_torture_one_read(this_cpu_ptr(&rcu_torture_timer_rand));
1148 1371
1149 /* Test call_rcu() invocation from interrupt handler. */ 1372 /* Test call_rcu() invocation from interrupt handler. */
1150 if (cur_ops->call) { 1373 if (cur_ops->call) {
@@ -1164,14 +1387,8 @@ static void rcu_torture_timer(struct timer_list *unused)
1164static int 1387static int
1165rcu_torture_reader(void *arg) 1388rcu_torture_reader(void *arg)
1166{ 1389{
1167 unsigned long started;
1168 unsigned long completed;
1169 int idx;
1170 DEFINE_TORTURE_RANDOM(rand); 1390 DEFINE_TORTURE_RANDOM(rand);
1171 struct rcu_torture *p;
1172 int pipe_count;
1173 struct timer_list t; 1391 struct timer_list t;
1174 unsigned long long ts;
1175 1392
1176 VERBOSE_TOROUT_STRING("rcu_torture_reader task started"); 1393 VERBOSE_TOROUT_STRING("rcu_torture_reader task started");
1177 set_user_nice(current, MAX_NICE); 1394 set_user_nice(current, MAX_NICE);
@@ -1183,49 +1400,8 @@ rcu_torture_reader(void *arg)
1183 if (!timer_pending(&t)) 1400 if (!timer_pending(&t))
1184 mod_timer(&t, jiffies + 1); 1401 mod_timer(&t, jiffies + 1);
1185 } 1402 }
1186 idx = cur_ops->readlock(); 1403 if (!rcu_torture_one_read(&rand))
1187 if (cur_ops->started)
1188 started = cur_ops->started();
1189 else
1190 started = cur_ops->completed();
1191 ts = rcu_trace_clock_local();
1192 p = rcu_dereference_check(rcu_torture_current,
1193 rcu_read_lock_bh_held() ||
1194 rcu_read_lock_sched_held() ||
1195 srcu_read_lock_held(srcu_ctlp) ||
1196 torturing_tasks());
1197 if (p == NULL) {
1198 /* Wait for rcu_torture_writer to get underway */
1199 cur_ops->readunlock(idx);
1200 schedule_timeout_interruptible(HZ); 1404 schedule_timeout_interruptible(HZ);
1201 continue;
1202 }
1203 if (p->rtort_mbtest == 0)
1204 atomic_inc(&n_rcu_torture_mberror);
1205 cur_ops->read_delay(&rand);
1206 preempt_disable();
1207 pipe_count = p->rtort_pipe_count;
1208 if (pipe_count > RCU_TORTURE_PIPE_LEN) {
1209 /* Should not happen, but... */
1210 pipe_count = RCU_TORTURE_PIPE_LEN;
1211 }
1212 completed = cur_ops->completed();
1213 if (pipe_count > 1) {
1214 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
1215 ts, started, completed);
1216 rcu_ftrace_dump(DUMP_ALL);
1217 }
1218 __this_cpu_inc(rcu_torture_count[pipe_count]);
1219 completed = completed - started;
1220 if (cur_ops->started)
1221 completed++;
1222 if (completed > RCU_TORTURE_PIPE_LEN) {
1223 /* Should not happen, but... */
1224 completed = RCU_TORTURE_PIPE_LEN;
1225 }
1226 __this_cpu_inc(rcu_torture_batch[completed]);
1227 preempt_enable();
1228 cur_ops->readunlock(idx);
1229 stutter_wait("rcu_torture_reader"); 1405 stutter_wait("rcu_torture_reader");
1230 } while (!torture_must_stop()); 1406 } while (!torture_must_stop());
1231 if (irqreader && cur_ops->irq_capable) { 1407 if (irqreader && cur_ops->irq_capable) {
@@ -1282,7 +1458,7 @@ rcu_torture_stats_print(void)
1282 pr_cont("rtbf: %ld rtb: %ld nt: %ld ", 1458 pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
1283 n_rcu_torture_boost_failure, 1459 n_rcu_torture_boost_failure,
1284 n_rcu_torture_boosts, 1460 n_rcu_torture_boosts,
1285 n_rcu_torture_timers); 1461 atomic_long_read(&n_rcu_torture_timers));
1286 torture_onoff_stats(); 1462 torture_onoff_stats();
1287 pr_cont("barrier: %ld/%ld:%ld ", 1463 pr_cont("barrier: %ld/%ld:%ld ",
1288 n_barrier_successes, 1464 n_barrier_successes,
@@ -1324,18 +1500,16 @@ rcu_torture_stats_print(void)
1324 if (rtcv_snap == rcu_torture_current_version && 1500 if (rtcv_snap == rcu_torture_current_version &&
1325 rcu_torture_current != NULL) { 1501 rcu_torture_current != NULL) {
1326 int __maybe_unused flags = 0; 1502 int __maybe_unused flags = 0;
1327 unsigned long __maybe_unused gpnum = 0; 1503 unsigned long __maybe_unused gp_seq = 0;
1328 unsigned long __maybe_unused completed = 0;
1329 1504
1330 rcutorture_get_gp_data(cur_ops->ttype, 1505 rcutorture_get_gp_data(cur_ops->ttype,
1331 &flags, &gpnum, &completed); 1506 &flags, &gp_seq);
1332 srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, 1507 srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
1333 &flags, &gpnum, &completed); 1508 &flags, &gp_seq);
1334 wtp = READ_ONCE(writer_task); 1509 wtp = READ_ONCE(writer_task);
1335 pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx cpu %d\n", 1510 pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#lx cpu %d\n",
1336 rcu_torture_writer_state_getname(), 1511 rcu_torture_writer_state_getname(),
1337 rcu_torture_writer_state, 1512 rcu_torture_writer_state, gp_seq, flags,
1338 gpnum, completed, flags,
1339 wtp == NULL ? ~0UL : wtp->state, 1513 wtp == NULL ? ~0UL : wtp->state,
1340 wtp == NULL ? -1 : (int)task_cpu(wtp)); 1514 wtp == NULL ? -1 : (int)task_cpu(wtp));
1341 if (!splatted && wtp) { 1515 if (!splatted && wtp) {
@@ -1365,7 +1539,7 @@ rcu_torture_stats(void *arg)
1365 return 0; 1539 return 0;
1366} 1540}
1367 1541
1368static inline void 1542static void
1369rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) 1543rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
1370{ 1544{
1371 pr_alert("%s" TORTURE_FLAG 1545 pr_alert("%s" TORTURE_FLAG
@@ -1397,6 +1571,7 @@ static int rcutorture_booster_cleanup(unsigned int cpu)
1397 mutex_lock(&boost_mutex); 1571 mutex_lock(&boost_mutex);
1398 t = boost_tasks[cpu]; 1572 t = boost_tasks[cpu];
1399 boost_tasks[cpu] = NULL; 1573 boost_tasks[cpu] = NULL;
1574 rcu_torture_enable_rt_throttle();
1400 mutex_unlock(&boost_mutex); 1575 mutex_unlock(&boost_mutex);
1401 1576
1402 /* This must be outside of the mutex, otherwise deadlock! */ 1577 /* This must be outside of the mutex, otherwise deadlock! */
@@ -1413,6 +1588,7 @@ static int rcutorture_booster_init(unsigned int cpu)
1413 1588
1414 /* Don't allow time recalculation while creating a new task. */ 1589 /* Don't allow time recalculation while creating a new task. */
1415 mutex_lock(&boost_mutex); 1590 mutex_lock(&boost_mutex);
1591 rcu_torture_disable_rt_throttle();
1416 VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task"); 1592 VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task");
1417 boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, 1593 boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
1418 cpu_to_node(cpu), 1594 cpu_to_node(cpu),
@@ -1446,7 +1622,7 @@ static int rcu_torture_stall(void *args)
1446 VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff"); 1622 VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff");
1447 } 1623 }
1448 if (!kthread_should_stop()) { 1624 if (!kthread_should_stop()) {
1449 stop_at = get_seconds() + stall_cpu; 1625 stop_at = ktime_get_seconds() + stall_cpu;
1450 /* RCU CPU stall is expected behavior in following code. */ 1626 /* RCU CPU stall is expected behavior in following code. */
1451 rcu_read_lock(); 1627 rcu_read_lock();
1452 if (stall_cpu_irqsoff) 1628 if (stall_cpu_irqsoff)
@@ -1455,7 +1631,8 @@ static int rcu_torture_stall(void *args)
1455 preempt_disable(); 1631 preempt_disable();
1456 pr_alert("rcu_torture_stall start on CPU %d.\n", 1632 pr_alert("rcu_torture_stall start on CPU %d.\n",
1457 smp_processor_id()); 1633 smp_processor_id());
1458 while (ULONG_CMP_LT(get_seconds(), stop_at)) 1634 while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
1635 stop_at))
1459 continue; /* Induce RCU CPU stall warning. */ 1636 continue; /* Induce RCU CPU stall warning. */
1460 if (stall_cpu_irqsoff) 1637 if (stall_cpu_irqsoff)
1461 local_irq_enable(); 1638 local_irq_enable();
@@ -1546,8 +1723,9 @@ static int rcu_torture_barrier(void *arg)
1546 atomic_read(&barrier_cbs_invoked), 1723 atomic_read(&barrier_cbs_invoked),
1547 n_barrier_cbs); 1724 n_barrier_cbs);
1548 WARN_ON_ONCE(1); 1725 WARN_ON_ONCE(1);
1726 } else {
1727 n_barrier_successes++;
1549 } 1728 }
1550 n_barrier_successes++;
1551 schedule_timeout_interruptible(HZ / 10); 1729 schedule_timeout_interruptible(HZ / 10);
1552 } while (!torture_must_stop()); 1730 } while (!torture_must_stop());
1553 torture_kthread_stopping("rcu_torture_barrier"); 1731 torture_kthread_stopping("rcu_torture_barrier");
@@ -1610,17 +1788,39 @@ static void rcu_torture_barrier_cleanup(void)
1610 } 1788 }
1611} 1789}
1612 1790
1791static bool rcu_torture_can_boost(void)
1792{
1793 static int boost_warn_once;
1794 int prio;
1795
1796 if (!(test_boost == 1 && cur_ops->can_boost) && test_boost != 2)
1797 return false;
1798
1799 prio = rcu_get_gp_kthreads_prio();
1800 if (!prio)
1801 return false;
1802
1803 if (prio < 2) {
1804 if (boost_warn_once == 1)
1805 return false;
1806
1807 pr_alert("%s: WARN: RCU kthread priority too low to test boosting. Skipping RCU boost test. Try passing rcutree.kthread_prio > 1 on the kernel command line.\n", KBUILD_MODNAME);
1808 boost_warn_once = 1;
1809 return false;
1810 }
1811
1812 return true;
1813}
1814
1613static enum cpuhp_state rcutor_hp; 1815static enum cpuhp_state rcutor_hp;
1614 1816
1615static void 1817static void
1616rcu_torture_cleanup(void) 1818rcu_torture_cleanup(void)
1617{ 1819{
1618 int flags = 0; 1820 int flags = 0;
1619 unsigned long gpnum = 0; 1821 unsigned long gp_seq = 0;
1620 unsigned long completed = 0;
1621 int i; 1822 int i;
1622 1823
1623 rcutorture_record_test_transition();
1624 if (torture_cleanup_begin()) { 1824 if (torture_cleanup_begin()) {
1625 if (cur_ops->cb_barrier != NULL) 1825 if (cur_ops->cb_barrier != NULL)
1626 cur_ops->cb_barrier(); 1826 cur_ops->cb_barrier();
@@ -1648,17 +1848,15 @@ rcu_torture_cleanup(void)
1648 fakewriter_tasks = NULL; 1848 fakewriter_tasks = NULL;
1649 } 1849 }
1650 1850
1651 rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed); 1851 rcutorture_get_gp_data(cur_ops->ttype, &flags, &gp_seq);
1652 srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, 1852 srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, &flags, &gp_seq);
1653 &flags, &gpnum, &completed); 1853 pr_alert("%s: End-test grace-period state: g%lu f%#x\n",
1654 pr_alert("%s: End-test grace-period state: g%lu c%lu f%#x\n", 1854 cur_ops->name, gp_seq, flags);
1655 cur_ops->name, gpnum, completed, flags);
1656 torture_stop_kthread(rcu_torture_stats, stats_task); 1855 torture_stop_kthread(rcu_torture_stats, stats_task);
1657 torture_stop_kthread(rcu_torture_fqs, fqs_task); 1856 torture_stop_kthread(rcu_torture_fqs, fqs_task);
1658 for (i = 0; i < ncbflooders; i++) 1857 for (i = 0; i < ncbflooders; i++)
1659 torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]); 1858 torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]);
1660 if ((test_boost == 1 && cur_ops->can_boost) || 1859 if (rcu_torture_can_boost())
1661 test_boost == 2)
1662 cpuhp_remove_state(rcutor_hp); 1860 cpuhp_remove_state(rcutor_hp);
1663 1861
1664 /* 1862 /*
@@ -1746,7 +1944,7 @@ rcu_torture_init(void)
1746 int firsterr = 0; 1944 int firsterr = 0;
1747 static struct rcu_torture_ops *torture_ops[] = { 1945 static struct rcu_torture_ops *torture_ops[] = {
1748 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, 1946 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
1749 &sched_ops, &tasks_ops, 1947 &busted_srcud_ops, &sched_ops, &tasks_ops,
1750 }; 1948 };
1751 1949
1752 if (!torture_init_begin(torture_type, verbose)) 1950 if (!torture_init_begin(torture_type, verbose))
@@ -1763,8 +1961,8 @@ rcu_torture_init(void)
1763 torture_type); 1961 torture_type);
1764 pr_alert("rcu-torture types:"); 1962 pr_alert("rcu-torture types:");
1765 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) 1963 for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
1766 pr_alert(" %s", torture_ops[i]->name); 1964 pr_cont(" %s", torture_ops[i]->name);
1767 pr_alert("\n"); 1965 pr_cont("\n");
1768 firsterr = -EINVAL; 1966 firsterr = -EINVAL;
1769 goto unwind; 1967 goto unwind;
1770 } 1968 }
@@ -1882,8 +2080,7 @@ rcu_torture_init(void)
1882 test_boost_interval = 1; 2080 test_boost_interval = 1;
1883 if (test_boost_duration < 2) 2081 if (test_boost_duration < 2)
1884 test_boost_duration = 2; 2082 test_boost_duration = 2;
1885 if ((test_boost == 1 && cur_ops->can_boost) || 2083 if (rcu_torture_can_boost()) {
1886 test_boost == 2) {
1887 2084
1888 boost_starttime = jiffies + test_boost_interval * HZ; 2085 boost_starttime = jiffies + test_boost_interval * HZ;
1889 2086
@@ -1897,7 +2094,7 @@ rcu_torture_init(void)
1897 firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); 2094 firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
1898 if (firsterr) 2095 if (firsterr)
1899 goto unwind; 2096 goto unwind;
1900 firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ); 2097 firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval);
1901 if (firsterr) 2098 if (firsterr)
1902 goto unwind; 2099 goto unwind;
1903 firsterr = rcu_torture_stall_init(); 2100 firsterr = rcu_torture_stall_init();
@@ -1926,7 +2123,6 @@ rcu_torture_init(void)
1926 goto unwind; 2123 goto unwind;
1927 } 2124 }
1928 } 2125 }
1929 rcutorture_record_test_transition();
1930 torture_init_end(); 2126 torture_init_end();
1931 return 0; 2127 return 0;
1932 2128
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 622792abe41a..04fc2ed71af8 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -110,7 +110,7 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx)
110 110
111 WRITE_ONCE(sp->srcu_lock_nesting[idx], newval); 111 WRITE_ONCE(sp->srcu_lock_nesting[idx], newval);
112 if (!newval && READ_ONCE(sp->srcu_gp_waiting)) 112 if (!newval && READ_ONCE(sp->srcu_gp_waiting))
113 swake_up(&sp->srcu_wq); 113 swake_up_one(&sp->srcu_wq);
114} 114}
115EXPORT_SYMBOL_GPL(__srcu_read_unlock); 115EXPORT_SYMBOL_GPL(__srcu_read_unlock);
116 116
@@ -140,7 +140,7 @@ void srcu_drive_gp(struct work_struct *wp)
140 idx = sp->srcu_idx; 140 idx = sp->srcu_idx;
141 WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); 141 WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
142 WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ 142 WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */
143 swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); 143 swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
144 WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ 144 WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
145 145
146 /* Invoke the callbacks we removed above. */ 146 /* Invoke the callbacks we removed above. */
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index b4123d7a2cec..6c9866a854b1 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -26,6 +26,8 @@
26 * 26 *
27 */ 27 */
28 28
29#define pr_fmt(fmt) "rcu: " fmt
30
29#include <linux/export.h> 31#include <linux/export.h>
30#include <linux/mutex.h> 32#include <linux/mutex.h>
31#include <linux/percpu.h> 33#include <linux/percpu.h>
@@ -390,7 +392,8 @@ void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
390 } 392 }
391 if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || 393 if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
392 WARN_ON(srcu_readers_active(sp))) { 394 WARN_ON(srcu_readers_active(sp))) {
393 pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); 395 pr_info("%s: Active srcu_struct %p state: %d\n",
396 __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
394 return; /* Caller forgot to stop doing call_srcu()? */ 397 return; /* Caller forgot to stop doing call_srcu()? */
395 } 398 }
396 free_percpu(sp->sda); 399 free_percpu(sp->sda);
@@ -641,6 +644,9 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
641 * period s. Losers must either ensure that their desired grace-period 644 * period s. Losers must either ensure that their desired grace-period
642 * number is recorded on at least their leaf srcu_node structure, or they 645 * number is recorded on at least their leaf srcu_node structure, or they
643 * must take steps to invoke their own callbacks. 646 * must take steps to invoke their own callbacks.
647 *
648 * Note that this function also does the work of srcu_funnel_exp_start(),
649 * in some cases by directly invoking it.
644 */ 650 */
645static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, 651static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
646 unsigned long s, bool do_norm) 652 unsigned long s, bool do_norm)
@@ -823,17 +829,17 @@ static void srcu_leak_callback(struct rcu_head *rhp)
823 * more than one CPU, this means that when "func()" is invoked, each CPU 829 * more than one CPU, this means that when "func()" is invoked, each CPU
824 * is guaranteed to have executed a full memory barrier since the end of 830 * is guaranteed to have executed a full memory barrier since the end of
825 * its last corresponding SRCU read-side critical section whose beginning 831 * its last corresponding SRCU read-side critical section whose beginning
826 * preceded the call to call_rcu(). It also means that each CPU executing 832 * preceded the call to call_srcu(). It also means that each CPU executing
827 * an SRCU read-side critical section that continues beyond the start of 833 * an SRCU read-side critical section that continues beyond the start of
828 * "func()" must have executed a memory barrier after the call_rcu() 834 * "func()" must have executed a memory barrier after the call_srcu()
829 * but before the beginning of that SRCU read-side critical section. 835 * but before the beginning of that SRCU read-side critical section.
830 * Note that these guarantees include CPUs that are offline, idle, or 836 * Note that these guarantees include CPUs that are offline, idle, or
831 * executing in user mode, as well as CPUs that are executing in the kernel. 837 * executing in user mode, as well as CPUs that are executing in the kernel.
832 * 838 *
833 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the 839 * Furthermore, if CPU A invoked call_srcu() and CPU B invoked the
834 * resulting SRCU callback function "func()", then both CPU A and CPU 840 * resulting SRCU callback function "func()", then both CPU A and CPU
835 * B are guaranteed to execute a full memory barrier during the time 841 * B are guaranteed to execute a full memory barrier during the time
836 * interval between the call to call_rcu() and the invocation of "func()". 842 * interval between the call to call_srcu() and the invocation of "func()".
837 * This guarantee applies even if CPU A and CPU B are the same CPU (but 843 * This guarantee applies even if CPU A and CPU B are the same CPU (but
838 * again only if the system has more than one CPU). 844 * again only if the system has more than one CPU).
839 * 845 *
@@ -1246,13 +1252,12 @@ static void process_srcu(struct work_struct *work)
1246 1252
1247void srcutorture_get_gp_data(enum rcutorture_type test_type, 1253void srcutorture_get_gp_data(enum rcutorture_type test_type,
1248 struct srcu_struct *sp, int *flags, 1254 struct srcu_struct *sp, int *flags,
1249 unsigned long *gpnum, unsigned long *completed) 1255 unsigned long *gp_seq)
1250{ 1256{
1251 if (test_type != SRCU_FLAVOR) 1257 if (test_type != SRCU_FLAVOR)
1252 return; 1258 return;
1253 *flags = 0; 1259 *flags = 0;
1254 *completed = rcu_seq_ctr(sp->srcu_gp_seq); 1260 *gp_seq = rcu_seq_current(&sp->srcu_gp_seq);
1255 *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed);
1256} 1261}
1257EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); 1262EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
1258 1263
@@ -1263,16 +1268,17 @@ void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf)
1263 unsigned long s0 = 0, s1 = 0; 1268 unsigned long s0 = 0, s1 = 0;
1264 1269
1265 idx = sp->srcu_idx & 0x1; 1270 idx = sp->srcu_idx & 0x1;
1266 pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", tt, tf, idx); 1271 pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):",
1272 tt, tf, rcu_seq_current(&sp->srcu_gp_seq), idx);
1267 for_each_possible_cpu(cpu) { 1273 for_each_possible_cpu(cpu) {
1268 unsigned long l0, l1; 1274 unsigned long l0, l1;
1269 unsigned long u0, u1; 1275 unsigned long u0, u1;
1270 long c0, c1; 1276 long c0, c1;
1271 struct srcu_data *counts; 1277 struct srcu_data *sdp;
1272 1278
1273 counts = per_cpu_ptr(sp->sda, cpu); 1279 sdp = per_cpu_ptr(sp->sda, cpu);
1274 u0 = counts->srcu_unlock_count[!idx]; 1280 u0 = sdp->srcu_unlock_count[!idx];
1275 u1 = counts->srcu_unlock_count[idx]; 1281 u1 = sdp->srcu_unlock_count[idx];
1276 1282
1277 /* 1283 /*
1278 * Make sure that a lock is always counted if the corresponding 1284 * Make sure that a lock is always counted if the corresponding
@@ -1280,12 +1286,13 @@ void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf)
1280 */ 1286 */
1281 smp_rmb(); 1287 smp_rmb();
1282 1288
1283 l0 = counts->srcu_lock_count[!idx]; 1289 l0 = sdp->srcu_lock_count[!idx];
1284 l1 = counts->srcu_lock_count[idx]; 1290 l1 = sdp->srcu_lock_count[idx];
1285 1291
1286 c0 = l0 - u0; 1292 c0 = l0 - u0;
1287 c1 = l1 - u1; 1293 c1 = l1 - u1;
1288 pr_cont(" %d(%ld,%ld)", cpu, c0, c1); 1294 pr_cont(" %d(%ld,%ld %1p)",
1295 cpu, c0, c1, rcu_segcblist_head(&sdp->srcu_cblist));
1289 s0 += c0; 1296 s0 += c0;
1290 s1 += c1; 1297 s1 += c1;
1291 } 1298 }
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index a64eee0db39e..befc9321a89c 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -122,10 +122,8 @@ void rcu_check_callbacks(int user)
122{ 122{
123 if (user) 123 if (user)
124 rcu_sched_qs(); 124 rcu_sched_qs();
125 else if (!in_softirq()) 125 if (user || !in_softirq())
126 rcu_bh_qs(); 126 rcu_bh_qs();
127 if (user)
128 rcu_note_voluntary_context_switch(current);
129} 127}
130 128
131/* 129/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index aa7cade1b9f3..0b760c1369f7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -27,6 +27,9 @@
27 * For detailed explanation of Read-Copy Update mechanism see - 27 * For detailed explanation of Read-Copy Update mechanism see -
28 * Documentation/RCU 28 * Documentation/RCU
29 */ 29 */
30
31#define pr_fmt(fmt) "rcu: " fmt
32
30#include <linux/types.h> 33#include <linux/types.h>
31#include <linux/kernel.h> 34#include <linux/kernel.h>
32#include <linux/init.h> 35#include <linux/init.h>
@@ -95,13 +98,13 @@ struct rcu_state sname##_state = { \
95 .rda = &sname##_data, \ 98 .rda = &sname##_data, \
96 .call = cr, \ 99 .call = cr, \
97 .gp_state = RCU_GP_IDLE, \ 100 .gp_state = RCU_GP_IDLE, \
98 .gpnum = 0UL - 300UL, \ 101 .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \
99 .completed = 0UL - 300UL, \
100 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 102 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
101 .name = RCU_STATE_NAME(sname), \ 103 .name = RCU_STATE_NAME(sname), \
102 .abbr = sabbr, \ 104 .abbr = sabbr, \
103 .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \ 105 .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
104 .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \ 106 .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
107 .ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \
105} 108}
106 109
107RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); 110RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
@@ -155,6 +158,9 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
155 */ 158 */
156static int rcu_scheduler_fully_active __read_mostly; 159static int rcu_scheduler_fully_active __read_mostly;
157 160
161static void
162rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
163 struct rcu_node *rnp, unsigned long gps, unsigned long flags);
158static void rcu_init_new_rnp(struct rcu_node *rnp_leaf); 164static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
159static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); 165static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
160static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 166static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
@@ -177,6 +183,13 @@ module_param(gp_init_delay, int, 0444);
177static int gp_cleanup_delay; 183static int gp_cleanup_delay;
178module_param(gp_cleanup_delay, int, 0444); 184module_param(gp_cleanup_delay, int, 0444);
179 185
186/* Retreive RCU kthreads priority for rcutorture */
187int rcu_get_gp_kthreads_prio(void)
188{
189 return kthread_prio;
190}
191EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
192
180/* 193/*
181 * Number of grace periods between delays, normalized by the duration of 194 * Number of grace periods between delays, normalized by the duration of
182 * the delay. The longer the delay, the more the grace periods between 195 * the delay. The longer the delay, the more the grace periods between
@@ -189,18 +202,6 @@ module_param(gp_cleanup_delay, int, 0444);
189#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */ 202#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */
190 203
191/* 204/*
192 * Track the rcutorture test sequence number and the update version
193 * number within a given test. The rcutorture_testseq is incremented
194 * on every rcutorture module load and unload, so has an odd value
195 * when a test is running. The rcutorture_vernum is set to zero
196 * when rcutorture starts and is incremented on each rcutorture update.
197 * These variables enable correlating rcutorture output with the
198 * RCU tracing information.
199 */
200unsigned long rcutorture_testseq;
201unsigned long rcutorture_vernum;
202
203/*
204 * Compute the mask of online CPUs for the specified rcu_node structure. 205 * Compute the mask of online CPUs for the specified rcu_node structure.
205 * This will not be stable unless the rcu_node structure's ->lock is 206 * This will not be stable unless the rcu_node structure's ->lock is
206 * held, but the bit corresponding to the current CPU will be stable 207 * held, but the bit corresponding to the current CPU will be stable
@@ -218,7 +219,7 @@ unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
218 */ 219 */
219static int rcu_gp_in_progress(struct rcu_state *rsp) 220static int rcu_gp_in_progress(struct rcu_state *rsp)
220{ 221{
221 return READ_ONCE(rsp->completed) != READ_ONCE(rsp->gpnum); 222 return rcu_seq_state(rcu_seq_current(&rsp->gp_seq));
222} 223}
223 224
224/* 225/*
@@ -233,7 +234,7 @@ void rcu_sched_qs(void)
233 if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) 234 if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
234 return; 235 return;
235 trace_rcu_grace_period(TPS("rcu_sched"), 236 trace_rcu_grace_period(TPS("rcu_sched"),
236 __this_cpu_read(rcu_sched_data.gpnum), 237 __this_cpu_read(rcu_sched_data.gp_seq),
237 TPS("cpuqs")); 238 TPS("cpuqs"));
238 __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false); 239 __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
239 if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) 240 if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
@@ -248,7 +249,7 @@ void rcu_bh_qs(void)
248 RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!"); 249 RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");
249 if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { 250 if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
250 trace_rcu_grace_period(TPS("rcu_bh"), 251 trace_rcu_grace_period(TPS("rcu_bh"),
251 __this_cpu_read(rcu_bh_data.gpnum), 252 __this_cpu_read(rcu_bh_data.gp_seq),
252 TPS("cpuqs")); 253 TPS("cpuqs"));
253 __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); 254 __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
254 } 255 }
@@ -380,20 +381,6 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)
380} 381}
381 382
382/* 383/*
383 * Do a double-increment of the ->dynticks counter to emulate a
384 * momentary idle-CPU quiescent state.
385 */
386static void rcu_dynticks_momentary_idle(void)
387{
388 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
389 int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
390 &rdtp->dynticks);
391
392 /* It is illegal to call this from idle state. */
393 WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
394}
395
396/*
397 * Set the special (bottom) bit of the specified CPU so that it 384 * Set the special (bottom) bit of the specified CPU so that it
398 * will take special action (such as flushing its TLB) on the 385 * will take special action (such as flushing its TLB) on the
399 * next exit from an extended quiescent state. Returns true if 386 * next exit from an extended quiescent state. Returns true if
@@ -424,12 +411,17 @@ bool rcu_eqs_special_set(int cpu)
424 * 411 *
425 * We inform the RCU core by emulating a zero-duration dyntick-idle period. 412 * We inform the RCU core by emulating a zero-duration dyntick-idle period.
426 * 413 *
427 * The caller must have disabled interrupts. 414 * The caller must have disabled interrupts and must not be idle.
428 */ 415 */
429static void rcu_momentary_dyntick_idle(void) 416static void rcu_momentary_dyntick_idle(void)
430{ 417{
418 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
419 int special;
420
431 raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false); 421 raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false);
432 rcu_dynticks_momentary_idle(); 422 special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
423 /* It is illegal to call this from idle state. */
424 WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
433} 425}
434 426
435/* 427/*
@@ -451,7 +443,7 @@ void rcu_note_context_switch(bool preempt)
451 rcu_momentary_dyntick_idle(); 443 rcu_momentary_dyntick_idle();
452 this_cpu_inc(rcu_dynticks.rcu_qs_ctr); 444 this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
453 if (!preempt) 445 if (!preempt)
454 rcu_note_voluntary_context_switch_lite(current); 446 rcu_tasks_qs(current);
455out: 447out:
456 trace_rcu_utilization(TPS("End context switch")); 448 trace_rcu_utilization(TPS("End context switch"));
457 barrier(); /* Avoid RCU read-side critical sections leaking up. */ 449 barrier(); /* Avoid RCU read-side critical sections leaking up. */
@@ -513,8 +505,38 @@ static ulong jiffies_till_first_fqs = ULONG_MAX;
513static ulong jiffies_till_next_fqs = ULONG_MAX; 505static ulong jiffies_till_next_fqs = ULONG_MAX;
514static bool rcu_kick_kthreads; 506static bool rcu_kick_kthreads;
515 507
516module_param(jiffies_till_first_fqs, ulong, 0644); 508static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
517module_param(jiffies_till_next_fqs, ulong, 0644); 509{
510 ulong j;
511 int ret = kstrtoul(val, 0, &j);
512
513 if (!ret)
514 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
515 return ret;
516}
517
518static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
519{
520 ulong j;
521 int ret = kstrtoul(val, 0, &j);
522
523 if (!ret)
524 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
525 return ret;
526}
527
528static struct kernel_param_ops first_fqs_jiffies_ops = {
529 .set = param_set_first_fqs_jiffies,
530 .get = param_get_ulong,
531};
532
533static struct kernel_param_ops next_fqs_jiffies_ops = {
534 .set = param_set_next_fqs_jiffies,
535 .get = param_get_ulong,
536};
537
538module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
539module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
518module_param(rcu_kick_kthreads, bool, 0644); 540module_param(rcu_kick_kthreads, bool, 0644);
519 541
520/* 542/*
@@ -529,58 +551,31 @@ static void force_quiescent_state(struct rcu_state *rsp);
529static int rcu_pending(void); 551static int rcu_pending(void);
530 552
531/* 553/*
532 * Return the number of RCU batches started thus far for debug & stats. 554 * Return the number of RCU GPs completed thus far for debug & stats.
533 */ 555 */
534unsigned long rcu_batches_started(void) 556unsigned long rcu_get_gp_seq(void)
535{ 557{
536 return rcu_state_p->gpnum; 558 return READ_ONCE(rcu_state_p->gp_seq);
537} 559}
538EXPORT_SYMBOL_GPL(rcu_batches_started); 560EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
539 561
540/* 562/*
541 * Return the number of RCU-sched batches started thus far for debug & stats. 563 * Return the number of RCU-sched GPs completed thus far for debug & stats.
542 */ 564 */
543unsigned long rcu_batches_started_sched(void) 565unsigned long rcu_sched_get_gp_seq(void)
544{ 566{
545 return rcu_sched_state.gpnum; 567 return READ_ONCE(rcu_sched_state.gp_seq);
546} 568}
547EXPORT_SYMBOL_GPL(rcu_batches_started_sched); 569EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
548 570
549/* 571/*
550 * Return the number of RCU BH batches started thus far for debug & stats. 572 * Return the number of RCU-bh GPs completed thus far for debug & stats.
551 */ 573 */
552unsigned long rcu_batches_started_bh(void) 574unsigned long rcu_bh_get_gp_seq(void)
553{ 575{
554 return rcu_bh_state.gpnum; 576 return READ_ONCE(rcu_bh_state.gp_seq);
555} 577}
556EXPORT_SYMBOL_GPL(rcu_batches_started_bh); 578EXPORT_SYMBOL_GPL(rcu_bh_get_gp_seq);
557
558/*
559 * Return the number of RCU batches completed thus far for debug & stats.
560 */
561unsigned long rcu_batches_completed(void)
562{
563 return rcu_state_p->completed;
564}
565EXPORT_SYMBOL_GPL(rcu_batches_completed);
566
567/*
568 * Return the number of RCU-sched batches completed thus far for debug & stats.
569 */
570unsigned long rcu_batches_completed_sched(void)
571{
572 return rcu_sched_state.completed;
573}
574EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
575
576/*
577 * Return the number of RCU BH batches completed thus far for debug & stats.
578 */
579unsigned long rcu_batches_completed_bh(void)
580{
581 return rcu_bh_state.completed;
582}
583EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
584 579
585/* 580/*
586 * Return the number of RCU expedited batches completed thus far for 581 * Return the number of RCU expedited batches completed thus far for
@@ -636,35 +631,42 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
636 */ 631 */
637void show_rcu_gp_kthreads(void) 632void show_rcu_gp_kthreads(void)
638{ 633{
634 int cpu;
635 struct rcu_data *rdp;
636 struct rcu_node *rnp;
639 struct rcu_state *rsp; 637 struct rcu_state *rsp;
640 638
641 for_each_rcu_flavor(rsp) { 639 for_each_rcu_flavor(rsp) {
642 pr_info("%s: wait state: %d ->state: %#lx\n", 640 pr_info("%s: wait state: %d ->state: %#lx\n",
643 rsp->name, rsp->gp_state, rsp->gp_kthread->state); 641 rsp->name, rsp->gp_state, rsp->gp_kthread->state);
642 rcu_for_each_node_breadth_first(rsp, rnp) {
643 if (ULONG_CMP_GE(rsp->gp_seq, rnp->gp_seq_needed))
644 continue;
645 pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n",
646 rnp->grplo, rnp->grphi, rnp->gp_seq,
647 rnp->gp_seq_needed);
648 if (!rcu_is_leaf_node(rnp))
649 continue;
650 for_each_leaf_node_possible_cpu(rnp, cpu) {
651 rdp = per_cpu_ptr(rsp->rda, cpu);
652 if (rdp->gpwrap ||
653 ULONG_CMP_GE(rsp->gp_seq,
654 rdp->gp_seq_needed))
655 continue;
656 pr_info("\tcpu %d ->gp_seq_needed %lu\n",
657 cpu, rdp->gp_seq_needed);
658 }
659 }
644 /* sched_show_task(rsp->gp_kthread); */ 660 /* sched_show_task(rsp->gp_kthread); */
645 } 661 }
646} 662}
647EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); 663EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
648 664
649/* 665/*
650 * Record the number of times rcutorture tests have been initiated and
651 * terminated. This information allows the debugfs tracing stats to be
652 * correlated to the rcutorture messages, even when the rcutorture module
653 * is being repeatedly loaded and unloaded. In other words, we cannot
654 * store this state in rcutorture itself.
655 */
656void rcutorture_record_test_transition(void)
657{
658 rcutorture_testseq++;
659 rcutorture_vernum = 0;
660}
661EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
662
663/*
664 * Send along grace-period-related data for rcutorture diagnostics. 666 * Send along grace-period-related data for rcutorture diagnostics.
665 */ 667 */
666void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, 668void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
667 unsigned long *gpnum, unsigned long *completed) 669 unsigned long *gp_seq)
668{ 670{
669 struct rcu_state *rsp = NULL; 671 struct rcu_state *rsp = NULL;
670 672
@@ -684,23 +686,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
684 if (rsp == NULL) 686 if (rsp == NULL)
685 return; 687 return;
686 *flags = READ_ONCE(rsp->gp_flags); 688 *flags = READ_ONCE(rsp->gp_flags);
687 *gpnum = READ_ONCE(rsp->gpnum); 689 *gp_seq = rcu_seq_current(&rsp->gp_seq);
688 *completed = READ_ONCE(rsp->completed);
689} 690}
690EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); 691EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
691 692
692/* 693/*
693 * Record the number of writer passes through the current rcutorture test.
694 * This is also used to correlate debugfs tracing stats with the rcutorture
695 * messages.
696 */
697void rcutorture_record_progress(unsigned long vernum)
698{
699 rcutorture_vernum++;
700}
701EXPORT_SYMBOL_GPL(rcutorture_record_progress);
702
703/*
704 * Return the root node of the specified rcu_state structure. 694 * Return the root node of the specified rcu_state structure.
705 */ 695 */
706static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 696static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
@@ -1059,41 +1049,41 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
1059#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) 1049#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
1060 1050
1061/* 1051/*
1062 * Is the current CPU online? Disable preemption to avoid false positives 1052 * Is the current CPU online as far as RCU is concerned?
1063 * that could otherwise happen due to the current CPU number being sampled,
1064 * this task being preempted, its old CPU being taken offline, resuming
1065 * on some other CPU, then determining that its old CPU is now offline.
1066 * It is OK to use RCU on an offline processor during initial boot, hence
1067 * the check for rcu_scheduler_fully_active. Note also that it is OK
1068 * for a CPU coming online to use RCU for one jiffy prior to marking itself
1069 * online in the cpu_online_mask. Similarly, it is OK for a CPU going
1070 * offline to continue to use RCU for one jiffy after marking itself
1071 * offline in the cpu_online_mask. This leniency is necessary given the
1072 * non-atomic nature of the online and offline processing, for example,
1073 * the fact that a CPU enters the scheduler after completing the teardown
1074 * of the CPU.
1075 * 1053 *
1076 * This is also why RCU internally marks CPUs online during in the 1054 * Disable preemption to avoid false positives that could otherwise
1077 * preparation phase and offline after the CPU has been taken down. 1055 * happen due to the current CPU number being sampled, this task being
1056 * preempted, its old CPU being taken offline, resuming on some other CPU,
1057 * then determining that its old CPU is now offline. Because there are
1058 * multiple flavors of RCU, and because this function can be called in the
1059 * midst of updating the flavors while a given CPU coming online or going
1060 * offline, it is necessary to check all flavors. If any of the flavors
1061 * believe that given CPU is online, it is considered to be online.
1078 * 1062 *
1079 * Disable checking if in an NMI handler because we cannot safely report 1063 * Disable checking if in an NMI handler because we cannot safely
1080 * errors from NMI handlers anyway. 1064 * report errors from NMI handlers anyway. In addition, it is OK to use
1065 * RCU on an offline processor during initial boot, hence the check for
1066 * rcu_scheduler_fully_active.
1081 */ 1067 */
1082bool rcu_lockdep_current_cpu_online(void) 1068bool rcu_lockdep_current_cpu_online(void)
1083{ 1069{
1084 struct rcu_data *rdp; 1070 struct rcu_data *rdp;
1085 struct rcu_node *rnp; 1071 struct rcu_node *rnp;
1086 bool ret; 1072 struct rcu_state *rsp;
1087 1073
1088 if (in_nmi()) 1074 if (in_nmi() || !rcu_scheduler_fully_active)
1089 return true; 1075 return true;
1090 preempt_disable(); 1076 preempt_disable();
1091 rdp = this_cpu_ptr(&rcu_sched_data); 1077 for_each_rcu_flavor(rsp) {
1092 rnp = rdp->mynode; 1078 rdp = this_cpu_ptr(rsp->rda);
1093 ret = (rdp->grpmask & rcu_rnp_online_cpus(rnp)) || 1079 rnp = rdp->mynode;
1094 !rcu_scheduler_fully_active; 1080 if (rdp->grpmask & rcu_rnp_online_cpus(rnp)) {
1081 preempt_enable();
1082 return true;
1083 }
1084 }
1095 preempt_enable(); 1085 preempt_enable();
1096 return ret; 1086 return false;
1097} 1087}
1098EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); 1088EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
1099 1089
@@ -1115,17 +1105,18 @@ static int rcu_is_cpu_rrupt_from_idle(void)
1115/* 1105/*
1116 * We are reporting a quiescent state on behalf of some other CPU, so 1106 * We are reporting a quiescent state on behalf of some other CPU, so
1117 * it is our responsibility to check for and handle potential overflow 1107 * it is our responsibility to check for and handle potential overflow
1118 * of the rcu_node ->gpnum counter with respect to the rcu_data counters. 1108 * of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
1119 * After all, the CPU might be in deep idle state, and thus executing no 1109 * After all, the CPU might be in deep idle state, and thus executing no
1120 * code whatsoever. 1110 * code whatsoever.
1121 */ 1111 */
1122static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp) 1112static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
1123{ 1113{
1124 raw_lockdep_assert_held_rcu_node(rnp); 1114 raw_lockdep_assert_held_rcu_node(rnp);
1125 if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum)) 1115 if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
1116 rnp->gp_seq))
1126 WRITE_ONCE(rdp->gpwrap, true); 1117 WRITE_ONCE(rdp->gpwrap, true);
1127 if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum)) 1118 if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
1128 rdp->rcu_iw_gpnum = rnp->gpnum + ULONG_MAX / 4; 1119 rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
1129} 1120}
1130 1121
1131/* 1122/*
@@ -1137,7 +1128,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
1137{ 1128{
1138 rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks); 1129 rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
1139 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { 1130 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
1140 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 1131 trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti"));
1141 rcu_gpnum_ovf(rdp->mynode, rdp); 1132 rcu_gpnum_ovf(rdp->mynode, rdp);
1142 return 1; 1133 return 1;
1143 } 1134 }
@@ -1159,7 +1150,7 @@ static void rcu_iw_handler(struct irq_work *iwp)
1159 rnp = rdp->mynode; 1150 rnp = rdp->mynode;
1160 raw_spin_lock_rcu_node(rnp); 1151 raw_spin_lock_rcu_node(rnp);
1161 if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) { 1152 if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
1162 rdp->rcu_iw_gpnum = rnp->gpnum; 1153 rdp->rcu_iw_gp_seq = rnp->gp_seq;
1163 rdp->rcu_iw_pending = false; 1154 rdp->rcu_iw_pending = false;
1164 } 1155 }
1165 raw_spin_unlock_rcu_node(rnp); 1156 raw_spin_unlock_rcu_node(rnp);
@@ -1187,7 +1178,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1187 * of the current RCU grace period. 1178 * of the current RCU grace period.
1188 */ 1179 */
1189 if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) { 1180 if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) {
1190 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 1181 trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti"));
1191 rdp->dynticks_fqs++; 1182 rdp->dynticks_fqs++;
1192 rcu_gpnum_ovf(rnp, rdp); 1183 rcu_gpnum_ovf(rnp, rdp);
1193 return 1; 1184 return 1;
@@ -1203,8 +1194,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1203 ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); 1194 ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
1204 if (time_after(jiffies, rdp->rsp->gp_start + jtsq) && 1195 if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
1205 READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) && 1196 READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
1206 READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) { 1197 rcu_seq_current(&rdp->gp_seq) == rnp->gp_seq && !rdp->gpwrap) {
1207 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc")); 1198 trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("rqc"));
1208 rcu_gpnum_ovf(rnp, rdp); 1199 rcu_gpnum_ovf(rnp, rdp);
1209 return 1; 1200 return 1;
1210 } else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) { 1201 } else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {
@@ -1212,12 +1203,25 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1212 smp_store_release(ruqp, true); 1203 smp_store_release(ruqp, true);
1213 } 1204 }
1214 1205
1215 /* Check for the CPU being offline. */ 1206 /* If waiting too long on an offline CPU, complain. */
1216 if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) { 1207 if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) &&
1217 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl")); 1208 time_after(jiffies, rdp->rsp->gp_start + HZ)) {
1218 rdp->offline_fqs++; 1209 bool onl;
1219 rcu_gpnum_ovf(rnp, rdp); 1210 struct rcu_node *rnp1;
1220 return 1; 1211
1212 WARN_ON(1); /* Offline CPUs are supposed to report QS! */
1213 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
1214 __func__, rnp->grplo, rnp->grphi, rnp->level,
1215 (long)rnp->gp_seq, (long)rnp->completedqs);
1216 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
1217 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
1218 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
1219 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
1220 pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
1221 __func__, rdp->cpu, ".o"[onl],
1222 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
1223 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
1224 return 1; /* Break things loose after complaining. */
1221 } 1225 }
1222 1226
1223 /* 1227 /*
@@ -1256,11 +1260,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1256 if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) { 1260 if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) {
1257 resched_cpu(rdp->cpu); 1261 resched_cpu(rdp->cpu);
1258 if (IS_ENABLED(CONFIG_IRQ_WORK) && 1262 if (IS_ENABLED(CONFIG_IRQ_WORK) &&
1259 !rdp->rcu_iw_pending && rdp->rcu_iw_gpnum != rnp->gpnum && 1263 !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
1260 (rnp->ffmask & rdp->grpmask)) { 1264 (rnp->ffmask & rdp->grpmask)) {
1261 init_irq_work(&rdp->rcu_iw, rcu_iw_handler); 1265 init_irq_work(&rdp->rcu_iw, rcu_iw_handler);
1262 rdp->rcu_iw_pending = true; 1266 rdp->rcu_iw_pending = true;
1263 rdp->rcu_iw_gpnum = rnp->gpnum; 1267 rdp->rcu_iw_gp_seq = rnp->gp_seq;
1264 irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); 1268 irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
1265 } 1269 }
1266 } 1270 }
@@ -1274,9 +1278,9 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
1274 unsigned long j1; 1278 unsigned long j1;
1275 1279
1276 rsp->gp_start = j; 1280 rsp->gp_start = j;
1277 smp_wmb(); /* Record start time before stall time. */
1278 j1 = rcu_jiffies_till_stall_check(); 1281 j1 = rcu_jiffies_till_stall_check();
1279 WRITE_ONCE(rsp->jiffies_stall, j + j1); 1282 /* Record ->gp_start before ->jiffies_stall. */
1283 smp_store_release(&rsp->jiffies_stall, j + j1); /* ^^^ */
1280 rsp->jiffies_resched = j + j1 / 2; 1284 rsp->jiffies_resched = j + j1 / 2;
1281 rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs); 1285 rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs);
1282} 1286}
@@ -1302,9 +1306,9 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
1302 j = jiffies; 1306 j = jiffies;
1303 gpa = READ_ONCE(rsp->gp_activity); 1307 gpa = READ_ONCE(rsp->gp_activity);
1304 if (j - gpa > 2 * HZ) { 1308 if (j - gpa > 2 * HZ) {
1305 pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n", 1309 pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
1306 rsp->name, j - gpa, 1310 rsp->name, j - gpa,
1307 rsp->gpnum, rsp->completed, 1311 (long)rcu_seq_current(&rsp->gp_seq),
1308 rsp->gp_flags, 1312 rsp->gp_flags,
1309 gp_state_getname(rsp->gp_state), rsp->gp_state, 1313 gp_state_getname(rsp->gp_state), rsp->gp_state,
1310 rsp->gp_kthread ? rsp->gp_kthread->state : ~0, 1314 rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
@@ -1359,16 +1363,15 @@ static void rcu_stall_kick_kthreads(struct rcu_state *rsp)
1359 } 1363 }
1360} 1364}
1361 1365
1362static inline void panic_on_rcu_stall(void) 1366static void panic_on_rcu_stall(void)
1363{ 1367{
1364 if (sysctl_panic_on_rcu_stall) 1368 if (sysctl_panic_on_rcu_stall)
1365 panic("RCU Stall\n"); 1369 panic("RCU Stall\n");
1366} 1370}
1367 1371
1368static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) 1372static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq)
1369{ 1373{
1370 int cpu; 1374 int cpu;
1371 long delta;
1372 unsigned long flags; 1375 unsigned long flags;
1373 unsigned long gpa; 1376 unsigned long gpa;
1374 unsigned long j; 1377 unsigned long j;
@@ -1381,25 +1384,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1381 if (rcu_cpu_stall_suppress) 1384 if (rcu_cpu_stall_suppress)
1382 return; 1385 return;
1383 1386
1384 /* Only let one CPU complain about others per time interval. */
1385
1386 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1387 delta = jiffies - READ_ONCE(rsp->jiffies_stall);
1388 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
1389 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1390 return;
1391 }
1392 WRITE_ONCE(rsp->jiffies_stall,
1393 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
1394 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1395
1396 /* 1387 /*
1397 * OK, time to rat on our buddy... 1388 * OK, time to rat on our buddy...
1398 * See Documentation/RCU/stallwarn.txt for info on how to debug 1389 * See Documentation/RCU/stallwarn.txt for info on how to debug
1399 * RCU CPU stall warnings. 1390 * RCU CPU stall warnings.
1400 */ 1391 */
1401 pr_err("INFO: %s detected stalls on CPUs/tasks:", 1392 pr_err("INFO: %s detected stalls on CPUs/tasks:", rsp->name);
1402 rsp->name);
1403 print_cpu_stall_info_begin(); 1393 print_cpu_stall_info_begin();
1404 rcu_for_each_leaf_node(rsp, rnp) { 1394 rcu_for_each_leaf_node(rsp, rnp) {
1405 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1395 raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -1418,17 +1408,16 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1418 for_each_possible_cpu(cpu) 1408 for_each_possible_cpu(cpu)
1419 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, 1409 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
1420 cpu)->cblist); 1410 cpu)->cblist);
1421 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", 1411 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
1422 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1412 smp_processor_id(), (long)(jiffies - rsp->gp_start),
1423 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1413 (long)rcu_seq_current(&rsp->gp_seq), totqlen);
1424 if (ndetected) { 1414 if (ndetected) {
1425 rcu_dump_cpu_stacks(rsp); 1415 rcu_dump_cpu_stacks(rsp);
1426 1416
1427 /* Complain about tasks blocking the grace period. */ 1417 /* Complain about tasks blocking the grace period. */
1428 rcu_print_detail_task_stall(rsp); 1418 rcu_print_detail_task_stall(rsp);
1429 } else { 1419 } else {
1430 if (READ_ONCE(rsp->gpnum) != gpnum || 1420 if (rcu_seq_current(&rsp->gp_seq) != gp_seq) {
1431 READ_ONCE(rsp->completed) == gpnum) {
1432 pr_err("INFO: Stall ended before state dump start\n"); 1421 pr_err("INFO: Stall ended before state dump start\n");
1433 } else { 1422 } else {
1434 j = jiffies; 1423 j = jiffies;
@@ -1441,6 +1430,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1441 sched_show_task(current); 1430 sched_show_task(current);
1442 } 1431 }
1443 } 1432 }
1433 /* Rewrite if needed in case of slow consoles. */
1434 if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
1435 WRITE_ONCE(rsp->jiffies_stall,
1436 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
1444 1437
1445 rcu_check_gp_kthread_starvation(rsp); 1438 rcu_check_gp_kthread_starvation(rsp);
1446 1439
@@ -1476,15 +1469,16 @@ static void print_cpu_stall(struct rcu_state *rsp)
1476 for_each_possible_cpu(cpu) 1469 for_each_possible_cpu(cpu)
1477 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, 1470 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
1478 cpu)->cblist); 1471 cpu)->cblist);
1479 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", 1472 pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n",
1480 jiffies - rsp->gp_start, 1473 jiffies - rsp->gp_start,
1481 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1474 (long)rcu_seq_current(&rsp->gp_seq), totqlen);
1482 1475
1483 rcu_check_gp_kthread_starvation(rsp); 1476 rcu_check_gp_kthread_starvation(rsp);
1484 1477
1485 rcu_dump_cpu_stacks(rsp); 1478 rcu_dump_cpu_stacks(rsp);
1486 1479
1487 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1480 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1481 /* Rewrite if needed in case of slow consoles. */
1488 if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) 1482 if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
1489 WRITE_ONCE(rsp->jiffies_stall, 1483 WRITE_ONCE(rsp->jiffies_stall,
1490 jiffies + 3 * rcu_jiffies_till_stall_check() + 3); 1484 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
@@ -1504,10 +1498,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
1504 1498
1505static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 1499static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
1506{ 1500{
1507 unsigned long completed; 1501 unsigned long gs1;
1508 unsigned long gpnum; 1502 unsigned long gs2;
1509 unsigned long gps; 1503 unsigned long gps;
1510 unsigned long j; 1504 unsigned long j;
1505 unsigned long jn;
1511 unsigned long js; 1506 unsigned long js;
1512 struct rcu_node *rnp; 1507 struct rcu_node *rnp;
1513 1508
@@ -1520,43 +1515,46 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
1520 /* 1515 /*
1521 * Lots of memory barriers to reject false positives. 1516 * Lots of memory barriers to reject false positives.
1522 * 1517 *
1523 * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall, 1518 * The idea is to pick up rsp->gp_seq, then rsp->jiffies_stall,
1524 * then rsp->gp_start, and finally rsp->completed. These values 1519 * then rsp->gp_start, and finally another copy of rsp->gp_seq.
1525 * are updated in the opposite order with memory barriers (or 1520 * These values are updated in the opposite order with memory
1526 * equivalent) during grace-period initialization and cleanup. 1521 * barriers (or equivalent) during grace-period initialization
1527 * Now, a false positive can occur if we get an new value of 1522 * and cleanup. Now, a false positive can occur if we get an new
1528 * rsp->gp_start and a old value of rsp->jiffies_stall. But given 1523 * value of rsp->gp_start and a old value of rsp->jiffies_stall.
1529 * the memory barriers, the only way that this can happen is if one 1524 * But given the memory barriers, the only way that this can happen
1530 * grace period ends and another starts between these two fetches. 1525 * is if one grace period ends and another starts between these
1531 * Detect this by comparing rsp->completed with the previous fetch 1526 * two fetches. This is detected by comparing the second fetch
1532 * from rsp->gpnum. 1527 * of rsp->gp_seq with the previous fetch from rsp->gp_seq.
1533 * 1528 *
1534 * Given this check, comparisons of jiffies, rsp->jiffies_stall, 1529 * Given this check, comparisons of jiffies, rsp->jiffies_stall,
1535 * and rsp->gp_start suffice to forestall false positives. 1530 * and rsp->gp_start suffice to forestall false positives.
1536 */ 1531 */
1537 gpnum = READ_ONCE(rsp->gpnum); 1532 gs1 = READ_ONCE(rsp->gp_seq);
1538 smp_rmb(); /* Pick up ->gpnum first... */ 1533 smp_rmb(); /* Pick up ->gp_seq first... */
1539 js = READ_ONCE(rsp->jiffies_stall); 1534 js = READ_ONCE(rsp->jiffies_stall);
1540 smp_rmb(); /* ...then ->jiffies_stall before the rest... */ 1535 smp_rmb(); /* ...then ->jiffies_stall before the rest... */
1541 gps = READ_ONCE(rsp->gp_start); 1536 gps = READ_ONCE(rsp->gp_start);
1542 smp_rmb(); /* ...and finally ->gp_start before ->completed. */ 1537 smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
1543 completed = READ_ONCE(rsp->completed); 1538 gs2 = READ_ONCE(rsp->gp_seq);
1544 if (ULONG_CMP_GE(completed, gpnum) || 1539 if (gs1 != gs2 ||
1545 ULONG_CMP_LT(j, js) || 1540 ULONG_CMP_LT(j, js) ||
1546 ULONG_CMP_GE(gps, js)) 1541 ULONG_CMP_GE(gps, js))
1547 return; /* No stall or GP completed since entering function. */ 1542 return; /* No stall or GP completed since entering function. */
1548 rnp = rdp->mynode; 1543 rnp = rdp->mynode;
1544 jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
1549 if (rcu_gp_in_progress(rsp) && 1545 if (rcu_gp_in_progress(rsp) &&
1550 (READ_ONCE(rnp->qsmask) & rdp->grpmask)) { 1546 (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
1547 cmpxchg(&rsp->jiffies_stall, js, jn) == js) {
1551 1548
1552 /* We haven't checked in, so go dump stack. */ 1549 /* We haven't checked in, so go dump stack. */
1553 print_cpu_stall(rsp); 1550 print_cpu_stall(rsp);
1554 1551
1555 } else if (rcu_gp_in_progress(rsp) && 1552 } else if (rcu_gp_in_progress(rsp) &&
1556 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 1553 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
1554 cmpxchg(&rsp->jiffies_stall, js, jn) == js) {
1557 1555
1558 /* They had a few time units to dump stack, so complain. */ 1556 /* They had a few time units to dump stack, so complain. */
1559 print_other_cpu_stall(rsp, gpnum); 1557 print_other_cpu_stall(rsp, gs2);
1560 } 1558 }
1561} 1559}
1562 1560
@@ -1577,123 +1575,99 @@ void rcu_cpu_stall_reset(void)
1577 WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2); 1575 WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2);
1578} 1576}
1579 1577
1580/*
1581 * Determine the value that ->completed will have at the end of the
1582 * next subsequent grace period. This is used to tag callbacks so that
1583 * a CPU can invoke callbacks in a timely fashion even if that CPU has
1584 * been dyntick-idle for an extended period with callbacks under the
1585 * influence of RCU_FAST_NO_HZ.
1586 *
1587 * The caller must hold rnp->lock with interrupts disabled.
1588 */
1589static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1590 struct rcu_node *rnp)
1591{
1592 raw_lockdep_assert_held_rcu_node(rnp);
1593
1594 /*
1595 * If RCU is idle, we just wait for the next grace period.
1596 * But we can only be sure that RCU is idle if we are looking
1597 * at the root rcu_node structure -- otherwise, a new grace
1598 * period might have started, but just not yet gotten around
1599 * to initializing the current non-root rcu_node structure.
1600 */
1601 if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
1602 return rnp->completed + 1;
1603
1604 /*
1605 * If the current rcu_node structure believes that RCU is
1606 * idle, and if the rcu_state structure does not yet reflect
1607 * the start of a new grace period, then the next grace period
1608 * will suffice. The memory barrier is needed to accurately
1609 * sample the rsp->gpnum, and pairs with the second lock
1610 * acquisition in rcu_gp_init(), which is augmented with
1611 * smp_mb__after_unlock_lock() for this purpose.
1612 */
1613 if (rnp->gpnum == rnp->completed) {
1614 smp_mb(); /* See above block comment. */
1615 if (READ_ONCE(rsp->gpnum) == rnp->completed)
1616 return rnp->completed + 1;
1617 }
1618
1619 /*
1620 * Otherwise, wait for a possible partial grace period and
1621 * then the subsequent full grace period.
1622 */
1623 return rnp->completed + 2;
1624}
1625
1626/* Trace-event wrapper function for trace_rcu_future_grace_period. */ 1578/* Trace-event wrapper function for trace_rcu_future_grace_period. */
1627static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, 1579static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1628 unsigned long c, const char *s) 1580 unsigned long gp_seq_req, const char *s)
1629{ 1581{
1630 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, 1582 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gp_seq, gp_seq_req,
1631 rnp->completed, c, rnp->level, 1583 rnp->level, rnp->grplo, rnp->grphi, s);
1632 rnp->grplo, rnp->grphi, s);
1633} 1584}
1634 1585
1635/* 1586/*
1587 * rcu_start_this_gp - Request the start of a particular grace period
1588 * @rnp_start: The leaf node of the CPU from which to start.
1589 * @rdp: The rcu_data corresponding to the CPU from which to start.
1590 * @gp_seq_req: The gp_seq of the grace period to start.
1591 *
1636 * Start the specified grace period, as needed to handle newly arrived 1592 * Start the specified grace period, as needed to handle newly arrived
1637 * callbacks. The required future grace periods are recorded in each 1593 * callbacks. The required future grace periods are recorded in each
1638 * rcu_node structure's ->need_future_gp[] field. Returns true if there 1594 * rcu_node structure's ->gp_seq_needed field. Returns true if there
1639 * is reason to awaken the grace-period kthread. 1595 * is reason to awaken the grace-period kthread.
1640 * 1596 *
1641 * The caller must hold the specified rcu_node structure's ->lock, which 1597 * The caller must hold the specified rcu_node structure's ->lock, which
1642 * is why the caller is responsible for waking the grace-period kthread. 1598 * is why the caller is responsible for waking the grace-period kthread.
1599 *
1600 * Returns true if the GP thread needs to be awakened else false.
1643 */ 1601 */
1644static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, 1602static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
1645 unsigned long c) 1603 unsigned long gp_seq_req)
1646{ 1604{
1647 bool ret = false; 1605 bool ret = false;
1648 struct rcu_state *rsp = rdp->rsp; 1606 struct rcu_state *rsp = rdp->rsp;
1649 struct rcu_node *rnp_root; 1607 struct rcu_node *rnp;
1650 1608
1651 /* 1609 /*
1652 * Use funnel locking to either acquire the root rcu_node 1610 * Use funnel locking to either acquire the root rcu_node
1653 * structure's lock or bail out if the need for this grace period 1611 * structure's lock or bail out if the need for this grace period
1654 * has already been recorded -- or has already started. If there 1612 * has already been recorded -- or if that grace period has in
1655 * is already a grace period in progress in a non-leaf node, no 1613 * fact already started. If there is already a grace period in
1656 * recording is needed because the end of the grace period will 1614 * progress in a non-leaf node, no recording is needed because the
1657 * scan the leaf rcu_node structures. Note that rnp->lock must 1615 * end of the grace period will scan the leaf rcu_node structures.
1658 * not be released. 1616 * Note that rnp_start->lock must not be released.
1659 */ 1617 */
1660 raw_lockdep_assert_held_rcu_node(rnp); 1618 raw_lockdep_assert_held_rcu_node(rnp_start);
1661 trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf")); 1619 trace_rcu_this_gp(rnp_start, rdp, gp_seq_req, TPS("Startleaf"));
1662 for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) { 1620 for (rnp = rnp_start; 1; rnp = rnp->parent) {
1663 if (rnp_root != rnp) 1621 if (rnp != rnp_start)
1664 raw_spin_lock_rcu_node(rnp_root); 1622 raw_spin_lock_rcu_node(rnp);
1665 WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum + 1623 if (ULONG_CMP_GE(rnp->gp_seq_needed, gp_seq_req) ||
1666 need_future_gp_mask(), c)); 1624 rcu_seq_started(&rnp->gp_seq, gp_seq_req) ||
1667 if (need_future_gp_element(rnp_root, c) || 1625 (rnp != rnp_start &&
1668 ULONG_CMP_GE(rnp_root->gpnum, c) || 1626 rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))) {
1669 (rnp != rnp_root && 1627 trace_rcu_this_gp(rnp, rdp, gp_seq_req,
1670 rnp_root->gpnum != rnp_root->completed)) { 1628 TPS("Prestarted"));
1671 trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
1672 goto unlock_out; 1629 goto unlock_out;
1673 } 1630 }
1674 need_future_gp_element(rnp_root, c) = true; 1631 rnp->gp_seq_needed = gp_seq_req;
1675 if (rnp_root != rnp && rnp_root->parent != NULL) 1632 if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) {
1676 raw_spin_unlock_rcu_node(rnp_root); 1633 /*
1677 if (!rnp_root->parent) 1634 * We just marked the leaf or internal node, and a
1635 * grace period is in progress, which means that
1636 * rcu_gp_cleanup() will see the marking. Bail to
1637 * reduce contention.
1638 */
1639 trace_rcu_this_gp(rnp_start, rdp, gp_seq_req,
1640 TPS("Startedleaf"));
1641 goto unlock_out;
1642 }
1643 if (rnp != rnp_start && rnp->parent != NULL)
1644 raw_spin_unlock_rcu_node(rnp);
1645 if (!rnp->parent)
1678 break; /* At root, and perhaps also leaf. */ 1646 break; /* At root, and perhaps also leaf. */
1679 } 1647 }
1680 1648
1681 /* If GP already in progress, just leave, otherwise start one. */ 1649 /* If GP already in progress, just leave, otherwise start one. */
1682 if (rnp_root->gpnum != rnp_root->completed) { 1650 if (rcu_gp_in_progress(rsp)) {
1683 trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot")); 1651 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedleafroot"));
1684 goto unlock_out; 1652 goto unlock_out;
1685 } 1653 }
1686 trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot")); 1654 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot"));
1687 WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT); 1655 WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
1656 rsp->gp_req_activity = jiffies;
1688 if (!rsp->gp_kthread) { 1657 if (!rsp->gp_kthread) {
1689 trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread")); 1658 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
1690 goto unlock_out; 1659 goto unlock_out;
1691 } 1660 }
1692 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq")); 1661 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), TPS("newreq"));
1693 ret = true; /* Caller must wake GP kthread. */ 1662 ret = true; /* Caller must wake GP kthread. */
1694unlock_out: 1663unlock_out:
1695 if (rnp != rnp_root) 1664 /* Push furthest requested GP to leaf node and rcu_data structure. */
1696 raw_spin_unlock_rcu_node(rnp_root); 1665 if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) {
1666 rnp_start->gp_seq_needed = rnp->gp_seq_needed;
1667 rdp->gp_seq_needed = rnp->gp_seq_needed;
1668 }
1669 if (rnp != rnp_start)
1670 raw_spin_unlock_rcu_node(rnp);
1697 return ret; 1671 return ret;
1698} 1672}
1699 1673
@@ -1703,13 +1677,13 @@ unlock_out:
1703 */ 1677 */
1704static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 1678static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1705{ 1679{
1706 unsigned long c = rnp->completed;
1707 bool needmore; 1680 bool needmore;
1708 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1681 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1709 1682
1710 need_future_gp_element(rnp, c) = false; 1683 needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);
1711 needmore = need_any_future_gp(rnp); 1684 if (!needmore)
1712 trace_rcu_this_gp(rnp, rdp, c, 1685 rnp->gp_seq_needed = rnp->gp_seq; /* Avoid counter wrap. */
1686 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq,
1713 needmore ? TPS("CleanupMore") : TPS("Cleanup")); 1687 needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1714 return needmore; 1688 return needmore;
1715} 1689}
@@ -1727,25 +1701,25 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
1727 !READ_ONCE(rsp->gp_flags) || 1701 !READ_ONCE(rsp->gp_flags) ||
1728 !rsp->gp_kthread) 1702 !rsp->gp_kthread)
1729 return; 1703 return;
1730 swake_up(&rsp->gp_wq); 1704 swake_up_one(&rsp->gp_wq);
1731} 1705}
1732 1706
1733/* 1707/*
1734 * If there is room, assign a ->completed number to any callbacks on 1708 * If there is room, assign a ->gp_seq number to any callbacks on this
1735 * this CPU that have not already been assigned. Also accelerate any 1709 * CPU that have not already been assigned. Also accelerate any callbacks
1736 * callbacks that were previously assigned a ->completed number that has 1710 * that were previously assigned a ->gp_seq number that has since proven
1737 * since proven to be too conservative, which can happen if callbacks get 1711 * to be too conservative, which can happen if callbacks get assigned a
1738 * assigned a ->completed number while RCU is idle, but with reference to 1712 * ->gp_seq number while RCU is idle, but with reference to a non-root
1739 * a non-root rcu_node structure. This function is idempotent, so it does 1713 * rcu_node structure. This function is idempotent, so it does not hurt
1740 * not hurt to call it repeatedly. Returns an flag saying that we should 1714 * to call it repeatedly. Returns an flag saying that we should awaken
1741 * awaken the RCU grace-period kthread. 1715 * the RCU grace-period kthread.
1742 * 1716 *
1743 * The caller must hold rnp->lock with interrupts disabled. 1717 * The caller must hold rnp->lock with interrupts disabled.
1744 */ 1718 */
1745static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1719static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1746 struct rcu_data *rdp) 1720 struct rcu_data *rdp)
1747{ 1721{
1748 unsigned long c; 1722 unsigned long gp_seq_req;
1749 bool ret = false; 1723 bool ret = false;
1750 1724
1751 raw_lockdep_assert_held_rcu_node(rnp); 1725 raw_lockdep_assert_held_rcu_node(rnp);
@@ -1764,22 +1738,50 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1764 * accelerating callback invocation to an earlier grace-period 1738 * accelerating callback invocation to an earlier grace-period
1765 * number. 1739 * number.
1766 */ 1740 */
1767 c = rcu_cbs_completed(rsp, rnp); 1741 gp_seq_req = rcu_seq_snap(&rsp->gp_seq);
1768 if (rcu_segcblist_accelerate(&rdp->cblist, c)) 1742 if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req))
1769 ret = rcu_start_this_gp(rnp, rdp, c); 1743 ret = rcu_start_this_gp(rnp, rdp, gp_seq_req);
1770 1744
1771 /* Trace depending on how much we were able to accelerate. */ 1745 /* Trace depending on how much we were able to accelerate. */
1772 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) 1746 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
1773 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); 1747 trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccWaitCB"));
1774 else 1748 else
1775 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); 1749 trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccReadyCB"));
1776 return ret; 1750 return ret;
1777} 1751}
1778 1752
1779/* 1753/*
1754 * Similar to rcu_accelerate_cbs(), but does not require that the leaf
1755 * rcu_node structure's ->lock be held. It consults the cached value
1756 * of ->gp_seq_needed in the rcu_data structure, and if that indicates
1757 * that a new grace-period request be made, invokes rcu_accelerate_cbs()
1758 * while holding the leaf rcu_node structure's ->lock.
1759 */
1760static void rcu_accelerate_cbs_unlocked(struct rcu_state *rsp,
1761 struct rcu_node *rnp,
1762 struct rcu_data *rdp)
1763{
1764 unsigned long c;
1765 bool needwake;
1766
1767 lockdep_assert_irqs_disabled();
1768 c = rcu_seq_snap(&rsp->gp_seq);
1769 if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
1770 /* Old request still live, so mark recent callbacks. */
1771 (void)rcu_segcblist_accelerate(&rdp->cblist, c);
1772 return;
1773 }
1774 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
1775 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
1776 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
1777 if (needwake)
1778 rcu_gp_kthread_wake(rsp);
1779}
1780
1781/*
1780 * Move any callbacks whose grace period has completed to the 1782 * Move any callbacks whose grace period has completed to the
1781 * RCU_DONE_TAIL sublist, then compact the remaining sublists and 1783 * RCU_DONE_TAIL sublist, then compact the remaining sublists and
1782 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL 1784 * assign ->gp_seq numbers to any callbacks in the RCU_NEXT_TAIL
1783 * sublist. This function is idempotent, so it does not hurt to 1785 * sublist. This function is idempotent, so it does not hurt to
1784 * invoke it repeatedly. As long as it is not invoked -too- often... 1786 * invoke it repeatedly. As long as it is not invoked -too- often...
1785 * Returns true if the RCU grace-period kthread needs to be awakened. 1787 * Returns true if the RCU grace-period kthread needs to be awakened.
@@ -1796,10 +1798,10 @@ static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1796 return false; 1798 return false;
1797 1799
1798 /* 1800 /*
1799 * Find all callbacks whose ->completed numbers indicate that they 1801 * Find all callbacks whose ->gp_seq numbers indicate that they
1800 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist. 1802 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
1801 */ 1803 */
1802 rcu_segcblist_advance(&rdp->cblist, rnp->completed); 1804 rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq);
1803 1805
1804 /* Classify any remaining callbacks. */ 1806 /* Classify any remaining callbacks. */
1805 return rcu_accelerate_cbs(rsp, rnp, rdp); 1807 return rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -1819,39 +1821,38 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1819 1821
1820 raw_lockdep_assert_held_rcu_node(rnp); 1822 raw_lockdep_assert_held_rcu_node(rnp);
1821 1823
1822 /* Handle the ends of any preceding grace periods first. */ 1824 if (rdp->gp_seq == rnp->gp_seq)
1823 if (rdp->completed == rnp->completed && 1825 return false; /* Nothing to do. */
1824 !unlikely(READ_ONCE(rdp->gpwrap))) {
1825
1826 /* No grace period end, so just accelerate recent callbacks. */
1827 ret = rcu_accelerate_cbs(rsp, rnp, rdp);
1828 1826
1827 /* Handle the ends of any preceding grace periods first. */
1828 if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||
1829 unlikely(READ_ONCE(rdp->gpwrap))) {
1830 ret = rcu_advance_cbs(rsp, rnp, rdp); /* Advance callbacks. */
1831 trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuend"));
1829 } else { 1832 } else {
1830 1833 ret = rcu_accelerate_cbs(rsp, rnp, rdp); /* Recent callbacks. */
1831 /* Advance callbacks. */
1832 ret = rcu_advance_cbs(rsp, rnp, rdp);
1833
1834 /* Remember that we saw this grace-period completion. */
1835 rdp->completed = rnp->completed;
1836 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1837 } 1834 }
1838 1835
1839 if (rdp->gpnum != rnp->gpnum || unlikely(READ_ONCE(rdp->gpwrap))) { 1836 /* Now handle the beginnings of any new-to-this-CPU grace periods. */
1837 if (rcu_seq_new_gp(rdp->gp_seq, rnp->gp_seq) ||
1838 unlikely(READ_ONCE(rdp->gpwrap))) {
1840 /* 1839 /*
1841 * If the current grace period is waiting for this CPU, 1840 * If the current grace period is waiting for this CPU,
1842 * set up to detect a quiescent state, otherwise don't 1841 * set up to detect a quiescent state, otherwise don't
1843 * go looking for one. 1842 * go looking for one.
1844 */ 1843 */
1845 rdp->gpnum = rnp->gpnum; 1844 trace_rcu_grace_period(rsp->name, rnp->gp_seq, TPS("cpustart"));
1846 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1847 need_gp = !!(rnp->qsmask & rdp->grpmask); 1845 need_gp = !!(rnp->qsmask & rdp->grpmask);
1848 rdp->cpu_no_qs.b.norm = need_gp; 1846 rdp->cpu_no_qs.b.norm = need_gp;
1849 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); 1847 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
1850 rdp->core_needs_qs = need_gp; 1848 rdp->core_needs_qs = need_gp;
1851 zero_cpu_stall_ticks(rdp); 1849 zero_cpu_stall_ticks(rdp);
1852 WRITE_ONCE(rdp->gpwrap, false);
1853 rcu_gpnum_ovf(rnp, rdp);
1854 } 1850 }
1851 rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
1852 if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) || rdp->gpwrap)
1853 rdp->gp_seq_needed = rnp->gp_seq_needed;
1854 WRITE_ONCE(rdp->gpwrap, false);
1855 rcu_gpnum_ovf(rnp, rdp);
1855 return ret; 1856 return ret;
1856} 1857}
1857 1858
@@ -1863,8 +1864,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1863 1864
1864 local_irq_save(flags); 1865 local_irq_save(flags);
1865 rnp = rdp->mynode; 1866 rnp = rdp->mynode;
1866 if ((rdp->gpnum == READ_ONCE(rnp->gpnum) && 1867 if ((rdp->gp_seq == rcu_seq_current(&rnp->gp_seq) &&
1867 rdp->completed == READ_ONCE(rnp->completed) &&
1868 !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */ 1868 !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
1869 !raw_spin_trylock_rcu_node(rnp)) { /* irqs already off, so later. */ 1869 !raw_spin_trylock_rcu_node(rnp)) { /* irqs already off, so later. */
1870 local_irq_restore(flags); 1870 local_irq_restore(flags);
@@ -1879,7 +1879,8 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1879static void rcu_gp_slow(struct rcu_state *rsp, int delay) 1879static void rcu_gp_slow(struct rcu_state *rsp, int delay)
1880{ 1880{
1881 if (delay > 0 && 1881 if (delay > 0 &&
1882 !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) 1882 !(rcu_seq_ctr(rsp->gp_seq) %
1883 (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
1883 schedule_timeout_uninterruptible(delay); 1884 schedule_timeout_uninterruptible(delay);
1884} 1885}
1885 1886
@@ -1888,7 +1889,9 @@ static void rcu_gp_slow(struct rcu_state *rsp, int delay)
1888 */ 1889 */
1889static bool rcu_gp_init(struct rcu_state *rsp) 1890static bool rcu_gp_init(struct rcu_state *rsp)
1890{ 1891{
1892 unsigned long flags;
1891 unsigned long oldmask; 1893 unsigned long oldmask;
1894 unsigned long mask;
1892 struct rcu_data *rdp; 1895 struct rcu_data *rdp;
1893 struct rcu_node *rnp = rcu_get_root(rsp); 1896 struct rcu_node *rnp = rcu_get_root(rsp);
1894 1897
@@ -1912,9 +1915,9 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1912 1915
1913 /* Advance to a new grace period and initialize state. */ 1916 /* Advance to a new grace period and initialize state. */
1914 record_gp_stall_check_time(rsp); 1917 record_gp_stall_check_time(rsp);
1915 /* Record GP times before starting GP, hence smp_store_release(). */ 1918 /* Record GP times before starting GP, hence rcu_seq_start(). */
1916 smp_store_release(&rsp->gpnum, rsp->gpnum + 1); 1919 rcu_seq_start(&rsp->gp_seq);
1917 trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); 1920 trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("start"));
1918 raw_spin_unlock_irq_rcu_node(rnp); 1921 raw_spin_unlock_irq_rcu_node(rnp);
1919 1922
1920 /* 1923 /*
@@ -1923,13 +1926,15 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1923 * for subsequent online CPUs, and that quiescent-state forcing 1926 * for subsequent online CPUs, and that quiescent-state forcing
1924 * will handle subsequent offline CPUs. 1927 * will handle subsequent offline CPUs.
1925 */ 1928 */
1929 rsp->gp_state = RCU_GP_ONOFF;
1926 rcu_for_each_leaf_node(rsp, rnp) { 1930 rcu_for_each_leaf_node(rsp, rnp) {
1927 rcu_gp_slow(rsp, gp_preinit_delay); 1931 spin_lock(&rsp->ofl_lock);
1928 raw_spin_lock_irq_rcu_node(rnp); 1932 raw_spin_lock_irq_rcu_node(rnp);
1929 if (rnp->qsmaskinit == rnp->qsmaskinitnext && 1933 if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
1930 !rnp->wait_blkd_tasks) { 1934 !rnp->wait_blkd_tasks) {
1931 /* Nothing to do on this leaf rcu_node structure. */ 1935 /* Nothing to do on this leaf rcu_node structure. */
1932 raw_spin_unlock_irq_rcu_node(rnp); 1936 raw_spin_unlock_irq_rcu_node(rnp);
1937 spin_unlock(&rsp->ofl_lock);
1933 continue; 1938 continue;
1934 } 1939 }
1935 1940
@@ -1939,12 +1944,14 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1939 1944
1940 /* If zero-ness of ->qsmaskinit changed, propagate up tree. */ 1945 /* If zero-ness of ->qsmaskinit changed, propagate up tree. */
1941 if (!oldmask != !rnp->qsmaskinit) { 1946 if (!oldmask != !rnp->qsmaskinit) {
1942 if (!oldmask) /* First online CPU for this rcu_node. */ 1947 if (!oldmask) { /* First online CPU for rcu_node. */
1943 rcu_init_new_rnp(rnp); 1948 if (!rnp->wait_blkd_tasks) /* Ever offline? */
1944 else if (rcu_preempt_has_tasks(rnp)) /* blocked tasks */ 1949 rcu_init_new_rnp(rnp);
1945 rnp->wait_blkd_tasks = true; 1950 } else if (rcu_preempt_has_tasks(rnp)) {
1946 else /* Last offline CPU and can propagate. */ 1951 rnp->wait_blkd_tasks = true; /* blocked tasks */
1952 } else { /* Last offline CPU and can propagate. */
1947 rcu_cleanup_dead_rnp(rnp); 1953 rcu_cleanup_dead_rnp(rnp);
1954 }
1948 } 1955 }
1949 1956
1950 /* 1957 /*
@@ -1953,18 +1960,19 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1953 * still offline, propagate up the rcu_node tree and 1960 * still offline, propagate up the rcu_node tree and
1954 * clear ->wait_blkd_tasks. Otherwise, if one of this 1961 * clear ->wait_blkd_tasks. Otherwise, if one of this
1955 * rcu_node structure's CPUs has since come back online, 1962 * rcu_node structure's CPUs has since come back online,
1956 * simply clear ->wait_blkd_tasks (but rcu_cleanup_dead_rnp() 1963 * simply clear ->wait_blkd_tasks.
1957 * checks for this, so just call it unconditionally).
1958 */ 1964 */
1959 if (rnp->wait_blkd_tasks && 1965 if (rnp->wait_blkd_tasks &&
1960 (!rcu_preempt_has_tasks(rnp) || 1966 (!rcu_preempt_has_tasks(rnp) || rnp->qsmaskinit)) {
1961 rnp->qsmaskinit)) {
1962 rnp->wait_blkd_tasks = false; 1967 rnp->wait_blkd_tasks = false;
1963 rcu_cleanup_dead_rnp(rnp); 1968 if (!rnp->qsmaskinit)
1969 rcu_cleanup_dead_rnp(rnp);
1964 } 1970 }
1965 1971
1966 raw_spin_unlock_irq_rcu_node(rnp); 1972 raw_spin_unlock_irq_rcu_node(rnp);
1973 spin_unlock(&rsp->ofl_lock);
1967 } 1974 }
1975 rcu_gp_slow(rsp, gp_preinit_delay); /* Races with CPU hotplug. */
1968 1976
1969 /* 1977 /*
1970 * Set the quiescent-state-needed bits in all the rcu_node 1978 * Set the quiescent-state-needed bits in all the rcu_node
@@ -1978,22 +1986,27 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1978 * The grace period cannot complete until the initialization 1986 * The grace period cannot complete until the initialization
1979 * process finishes, because this kthread handles both. 1987 * process finishes, because this kthread handles both.
1980 */ 1988 */
1989 rsp->gp_state = RCU_GP_INIT;
1981 rcu_for_each_node_breadth_first(rsp, rnp) { 1990 rcu_for_each_node_breadth_first(rsp, rnp) {
1982 rcu_gp_slow(rsp, gp_init_delay); 1991 rcu_gp_slow(rsp, gp_init_delay);
1983 raw_spin_lock_irq_rcu_node(rnp); 1992 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1984 rdp = this_cpu_ptr(rsp->rda); 1993 rdp = this_cpu_ptr(rsp->rda);
1985 rcu_preempt_check_blocked_tasks(rnp); 1994 rcu_preempt_check_blocked_tasks(rsp, rnp);
1986 rnp->qsmask = rnp->qsmaskinit; 1995 rnp->qsmask = rnp->qsmaskinit;
1987 WRITE_ONCE(rnp->gpnum, rsp->gpnum); 1996 WRITE_ONCE(rnp->gp_seq, rsp->gp_seq);
1988 if (WARN_ON_ONCE(rnp->completed != rsp->completed))
1989 WRITE_ONCE(rnp->completed, rsp->completed);
1990 if (rnp == rdp->mynode) 1997 if (rnp == rdp->mynode)
1991 (void)__note_gp_changes(rsp, rnp, rdp); 1998 (void)__note_gp_changes(rsp, rnp, rdp);
1992 rcu_preempt_boost_start_gp(rnp); 1999 rcu_preempt_boost_start_gp(rnp);
1993 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 2000 trace_rcu_grace_period_init(rsp->name, rnp->gp_seq,
1994 rnp->level, rnp->grplo, 2001 rnp->level, rnp->grplo,
1995 rnp->grphi, rnp->qsmask); 2002 rnp->grphi, rnp->qsmask);
1996 raw_spin_unlock_irq_rcu_node(rnp); 2003 /* Quiescent states for tasks on any now-offline CPUs. */
2004 mask = rnp->qsmask & ~rnp->qsmaskinitnext;
2005 rnp->rcu_gp_init_mask = mask;
2006 if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))
2007 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
2008 else
2009 raw_spin_unlock_irq_rcu_node(rnp);
1997 cond_resched_tasks_rcu_qs(); 2010 cond_resched_tasks_rcu_qs();
1998 WRITE_ONCE(rsp->gp_activity, jiffies); 2011 WRITE_ONCE(rsp->gp_activity, jiffies);
1999 } 2012 }
@@ -2002,7 +2015,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
2002} 2015}
2003 2016
2004/* 2017/*
2005 * Helper function for swait_event_idle() wakeup at force-quiescent-state 2018 * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
2006 * time. 2019 * time.
2007 */ 2020 */
2008static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) 2021static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
@@ -2053,6 +2066,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
2053{ 2066{
2054 unsigned long gp_duration; 2067 unsigned long gp_duration;
2055 bool needgp = false; 2068 bool needgp = false;
2069 unsigned long new_gp_seq;
2056 struct rcu_data *rdp; 2070 struct rcu_data *rdp;
2057 struct rcu_node *rnp = rcu_get_root(rsp); 2071 struct rcu_node *rnp = rcu_get_root(rsp);
2058 struct swait_queue_head *sq; 2072 struct swait_queue_head *sq;
@@ -2074,19 +2088,22 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
2074 raw_spin_unlock_irq_rcu_node(rnp); 2088 raw_spin_unlock_irq_rcu_node(rnp);
2075 2089
2076 /* 2090 /*
2077 * Propagate new ->completed value to rcu_node structures so 2091 * Propagate new ->gp_seq value to rcu_node structures so that
2078 * that other CPUs don't have to wait until the start of the next 2092 * other CPUs don't have to wait until the start of the next grace
2079 * grace period to process their callbacks. This also avoids 2093 * period to process their callbacks. This also avoids some nasty
2080 * some nasty RCU grace-period initialization races by forcing 2094 * RCU grace-period initialization races by forcing the end of
2081 * the end of the current grace period to be completely recorded in 2095 * the current grace period to be completely recorded in all of
2082 * all of the rcu_node structures before the beginning of the next 2096 * the rcu_node structures before the beginning of the next grace
2083 * grace period is recorded in any of the rcu_node structures. 2097 * period is recorded in any of the rcu_node structures.
2084 */ 2098 */
2099 new_gp_seq = rsp->gp_seq;
2100 rcu_seq_end(&new_gp_seq);
2085 rcu_for_each_node_breadth_first(rsp, rnp) { 2101 rcu_for_each_node_breadth_first(rsp, rnp) {
2086 raw_spin_lock_irq_rcu_node(rnp); 2102 raw_spin_lock_irq_rcu_node(rnp);
2087 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 2103 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
2104 dump_blkd_tasks(rsp, rnp, 10);
2088 WARN_ON_ONCE(rnp->qsmask); 2105 WARN_ON_ONCE(rnp->qsmask);
2089 WRITE_ONCE(rnp->completed, rsp->gpnum); 2106 WRITE_ONCE(rnp->gp_seq, new_gp_seq);
2090 rdp = this_cpu_ptr(rsp->rda); 2107 rdp = this_cpu_ptr(rsp->rda);
2091 if (rnp == rdp->mynode) 2108 if (rnp == rdp->mynode)
2092 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; 2109 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
@@ -2100,26 +2117,28 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
2100 rcu_gp_slow(rsp, gp_cleanup_delay); 2117 rcu_gp_slow(rsp, gp_cleanup_delay);
2101 } 2118 }
2102 rnp = rcu_get_root(rsp); 2119 rnp = rcu_get_root(rsp);
2103 raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */ 2120 raw_spin_lock_irq_rcu_node(rnp); /* GP before rsp->gp_seq update. */
2104 2121
2105 /* Declare grace period done. */ 2122 /* Declare grace period done. */
2106 WRITE_ONCE(rsp->completed, rsp->gpnum); 2123 rcu_seq_end(&rsp->gp_seq);
2107 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); 2124 trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("end"));
2108 rsp->gp_state = RCU_GP_IDLE; 2125 rsp->gp_state = RCU_GP_IDLE;
2109 /* Check for GP requests since above loop. */ 2126 /* Check for GP requests since above loop. */
2110 rdp = this_cpu_ptr(rsp->rda); 2127 rdp = this_cpu_ptr(rsp->rda);
2111 if (need_any_future_gp(rnp)) { 2128 if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
2112 trace_rcu_this_gp(rnp, rdp, rsp->completed - 1, 2129 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed,
2113 TPS("CleanupMore")); 2130 TPS("CleanupMore"));
2114 needgp = true; 2131 needgp = true;
2115 } 2132 }
2116 /* Advance CBs to reduce false positives below. */ 2133 /* Advance CBs to reduce false positives below. */
2117 if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) { 2134 if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
2118 WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); 2135 WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
2119 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), 2136 rsp->gp_req_activity = jiffies;
2137 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq),
2120 TPS("newreq")); 2138 TPS("newreq"));
2139 } else {
2140 WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
2121 } 2141 }
2122 WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
2123 raw_spin_unlock_irq_rcu_node(rnp); 2142 raw_spin_unlock_irq_rcu_node(rnp);
2124} 2143}
2125 2144
@@ -2141,10 +2160,10 @@ static int __noreturn rcu_gp_kthread(void *arg)
2141 /* Handle grace-period start. */ 2160 /* Handle grace-period start. */
2142 for (;;) { 2161 for (;;) {
2143 trace_rcu_grace_period(rsp->name, 2162 trace_rcu_grace_period(rsp->name,
2144 READ_ONCE(rsp->gpnum), 2163 READ_ONCE(rsp->gp_seq),
2145 TPS("reqwait")); 2164 TPS("reqwait"));
2146 rsp->gp_state = RCU_GP_WAIT_GPS; 2165 rsp->gp_state = RCU_GP_WAIT_GPS;
2147 swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & 2166 swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
2148 RCU_GP_FLAG_INIT); 2167 RCU_GP_FLAG_INIT);
2149 rsp->gp_state = RCU_GP_DONE_GPS; 2168 rsp->gp_state = RCU_GP_DONE_GPS;
2150 /* Locking provides needed memory barrier. */ 2169 /* Locking provides needed memory barrier. */
@@ -2154,17 +2173,13 @@ static int __noreturn rcu_gp_kthread(void *arg)
2154 WRITE_ONCE(rsp->gp_activity, jiffies); 2173 WRITE_ONCE(rsp->gp_activity, jiffies);
2155 WARN_ON(signal_pending(current)); 2174 WARN_ON(signal_pending(current));
2156 trace_rcu_grace_period(rsp->name, 2175 trace_rcu_grace_period(rsp->name,
2157 READ_ONCE(rsp->gpnum), 2176 READ_ONCE(rsp->gp_seq),
2158 TPS("reqwaitsig")); 2177 TPS("reqwaitsig"));
2159 } 2178 }
2160 2179
2161 /* Handle quiescent-state forcing. */ 2180 /* Handle quiescent-state forcing. */
2162 first_gp_fqs = true; 2181 first_gp_fqs = true;
2163 j = jiffies_till_first_fqs; 2182 j = jiffies_till_first_fqs;
2164 if (j > HZ) {
2165 j = HZ;
2166 jiffies_till_first_fqs = HZ;
2167 }
2168 ret = 0; 2183 ret = 0;
2169 for (;;) { 2184 for (;;) {
2170 if (!ret) { 2185 if (!ret) {
@@ -2173,10 +2188,10 @@ static int __noreturn rcu_gp_kthread(void *arg)
2173 jiffies + 3 * j); 2188 jiffies + 3 * j);
2174 } 2189 }
2175 trace_rcu_grace_period(rsp->name, 2190 trace_rcu_grace_period(rsp->name,
2176 READ_ONCE(rsp->gpnum), 2191 READ_ONCE(rsp->gp_seq),
2177 TPS("fqswait")); 2192 TPS("fqswait"));
2178 rsp->gp_state = RCU_GP_WAIT_FQS; 2193 rsp->gp_state = RCU_GP_WAIT_FQS;
2179 ret = swait_event_idle_timeout(rsp->gp_wq, 2194 ret = swait_event_idle_timeout_exclusive(rsp->gp_wq,
2180 rcu_gp_fqs_check_wake(rsp, &gf), j); 2195 rcu_gp_fqs_check_wake(rsp, &gf), j);
2181 rsp->gp_state = RCU_GP_DOING_FQS; 2196 rsp->gp_state = RCU_GP_DOING_FQS;
2182 /* Locking provides needed memory barriers. */ 2197 /* Locking provides needed memory barriers. */
@@ -2188,31 +2203,24 @@ static int __noreturn rcu_gp_kthread(void *arg)
2188 if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || 2203 if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
2189 (gf & RCU_GP_FLAG_FQS)) { 2204 (gf & RCU_GP_FLAG_FQS)) {
2190 trace_rcu_grace_period(rsp->name, 2205 trace_rcu_grace_period(rsp->name,
2191 READ_ONCE(rsp->gpnum), 2206 READ_ONCE(rsp->gp_seq),
2192 TPS("fqsstart")); 2207 TPS("fqsstart"));
2193 rcu_gp_fqs(rsp, first_gp_fqs); 2208 rcu_gp_fqs(rsp, first_gp_fqs);
2194 first_gp_fqs = false; 2209 first_gp_fqs = false;
2195 trace_rcu_grace_period(rsp->name, 2210 trace_rcu_grace_period(rsp->name,
2196 READ_ONCE(rsp->gpnum), 2211 READ_ONCE(rsp->gp_seq),
2197 TPS("fqsend")); 2212 TPS("fqsend"));
2198 cond_resched_tasks_rcu_qs(); 2213 cond_resched_tasks_rcu_qs();
2199 WRITE_ONCE(rsp->gp_activity, jiffies); 2214 WRITE_ONCE(rsp->gp_activity, jiffies);
2200 ret = 0; /* Force full wait till next FQS. */ 2215 ret = 0; /* Force full wait till next FQS. */
2201 j = jiffies_till_next_fqs; 2216 j = jiffies_till_next_fqs;
2202 if (j > HZ) {
2203 j = HZ;
2204 jiffies_till_next_fqs = HZ;
2205 } else if (j < 1) {
2206 j = 1;
2207 jiffies_till_next_fqs = 1;
2208 }
2209 } else { 2217 } else {
2210 /* Deal with stray signal. */ 2218 /* Deal with stray signal. */
2211 cond_resched_tasks_rcu_qs(); 2219 cond_resched_tasks_rcu_qs();
2212 WRITE_ONCE(rsp->gp_activity, jiffies); 2220 WRITE_ONCE(rsp->gp_activity, jiffies);
2213 WARN_ON(signal_pending(current)); 2221 WARN_ON(signal_pending(current));
2214 trace_rcu_grace_period(rsp->name, 2222 trace_rcu_grace_period(rsp->name,
2215 READ_ONCE(rsp->gpnum), 2223 READ_ONCE(rsp->gp_seq),
2216 TPS("fqswaitsig")); 2224 TPS("fqswaitsig"));
2217 ret = 1; /* Keep old FQS timing. */ 2225 ret = 1; /* Keep old FQS timing. */
2218 j = jiffies; 2226 j = jiffies;
@@ -2256,8 +2264,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
2256 * must be represented by the same rcu_node structure (which need not be a 2264 * must be represented by the same rcu_node structure (which need not be a
2257 * leaf rcu_node structure, though it often will be). The gps parameter 2265 * leaf rcu_node structure, though it often will be). The gps parameter
2258 * is the grace-period snapshot, which means that the quiescent states 2266 * is the grace-period snapshot, which means that the quiescent states
2259 * are valid only if rnp->gpnum is equal to gps. That structure's lock 2267 * are valid only if rnp->gp_seq is equal to gps. That structure's lock
2260 * must be held upon entry, and it is released before return. 2268 * must be held upon entry, and it is released before return.
2269 *
2270 * As a special case, if mask is zero, the bit-already-cleared check is
2271 * disabled. This allows propagating quiescent state due to resumed tasks
2272 * during grace-period initialization.
2261 */ 2273 */
2262static void 2274static void
2263rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 2275rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
@@ -2271,7 +2283,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2271 2283
2272 /* Walk up the rcu_node hierarchy. */ 2284 /* Walk up the rcu_node hierarchy. */
2273 for (;;) { 2285 for (;;) {
2274 if (!(rnp->qsmask & mask) || rnp->gpnum != gps) { 2286 if ((!(rnp->qsmask & mask) && mask) || rnp->gp_seq != gps) {
2275 2287
2276 /* 2288 /*
2277 * Our bit has already been cleared, or the 2289 * Our bit has already been cleared, or the
@@ -2284,7 +2296,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2284 WARN_ON_ONCE(!rcu_is_leaf_node(rnp) && 2296 WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
2285 rcu_preempt_blocked_readers_cgp(rnp)); 2297 rcu_preempt_blocked_readers_cgp(rnp));
2286 rnp->qsmask &= ~mask; 2298 rnp->qsmask &= ~mask;
2287 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, 2299 trace_rcu_quiescent_state_report(rsp->name, rnp->gp_seq,
2288 mask, rnp->qsmask, rnp->level, 2300 mask, rnp->qsmask, rnp->level,
2289 rnp->grplo, rnp->grphi, 2301 rnp->grplo, rnp->grphi,
2290 !!rnp->gp_tasks); 2302 !!rnp->gp_tasks);
@@ -2294,6 +2306,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2294 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2306 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2295 return; 2307 return;
2296 } 2308 }
2309 rnp->completedqs = rnp->gp_seq;
2297 mask = rnp->grpmask; 2310 mask = rnp->grpmask;
2298 if (rnp->parent == NULL) { 2311 if (rnp->parent == NULL) {
2299 2312
@@ -2323,8 +2336,9 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2323 * irqs disabled, and this lock is released upon return, but irqs remain 2336 * irqs disabled, and this lock is released upon return, but irqs remain
2324 * disabled. 2337 * disabled.
2325 */ 2338 */
2326static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, 2339static void __maybe_unused
2327 struct rcu_node *rnp, unsigned long flags) 2340rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2341 struct rcu_node *rnp, unsigned long flags)
2328 __releases(rnp->lock) 2342 __releases(rnp->lock)
2329{ 2343{
2330 unsigned long gps; 2344 unsigned long gps;
@@ -2332,12 +2346,15 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2332 struct rcu_node *rnp_p; 2346 struct rcu_node *rnp_p;
2333 2347
2334 raw_lockdep_assert_held_rcu_node(rnp); 2348 raw_lockdep_assert_held_rcu_node(rnp);
2335 if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || 2349 if (WARN_ON_ONCE(rcu_state_p == &rcu_sched_state) ||
2336 rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 2350 WARN_ON_ONCE(rsp != rcu_state_p) ||
2351 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
2352 rnp->qsmask != 0) {
2337 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2353 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2338 return; /* Still need more quiescent states! */ 2354 return; /* Still need more quiescent states! */
2339 } 2355 }
2340 2356
2357 rnp->completedqs = rnp->gp_seq;
2341 rnp_p = rnp->parent; 2358 rnp_p = rnp->parent;
2342 if (rnp_p == NULL) { 2359 if (rnp_p == NULL) {
2343 /* 2360 /*
@@ -2348,8 +2365,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2348 return; 2365 return;
2349 } 2366 }
2350 2367
2351 /* Report up the rest of the hierarchy, tracking current ->gpnum. */ 2368 /* Report up the rest of the hierarchy, tracking current ->gp_seq. */
2352 gps = rnp->gpnum; 2369 gps = rnp->gp_seq;
2353 mask = rnp->grpmask; 2370 mask = rnp->grpmask;
2354 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ 2371 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
2355 raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */ 2372 raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */
@@ -2370,8 +2387,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2370 2387
2371 rnp = rdp->mynode; 2388 rnp = rdp->mynode;
2372 raw_spin_lock_irqsave_rcu_node(rnp, flags); 2389 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2373 if (rdp->cpu_no_qs.b.norm || rdp->gpnum != rnp->gpnum || 2390 if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
2374 rnp->completed == rnp->gpnum || rdp->gpwrap) { 2391 rdp->gpwrap) {
2375 2392
2376 /* 2393 /*
2377 * The grace period in which this quiescent state was 2394 * The grace period in which this quiescent state was
@@ -2396,7 +2413,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2396 */ 2413 */
2397 needwake = rcu_accelerate_cbs(rsp, rnp, rdp); 2414 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
2398 2415
2399 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); 2416 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
2400 /* ^^^ Released rnp->lock */ 2417 /* ^^^ Released rnp->lock */
2401 if (needwake) 2418 if (needwake)
2402 rcu_gp_kthread_wake(rsp); 2419 rcu_gp_kthread_wake(rsp);
@@ -2441,17 +2458,16 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
2441 */ 2458 */
2442static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 2459static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2443{ 2460{
2444 RCU_TRACE(unsigned long mask;) 2461 RCU_TRACE(bool blkd;)
2445 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);) 2462 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);)
2446 RCU_TRACE(struct rcu_node *rnp = rdp->mynode;) 2463 RCU_TRACE(struct rcu_node *rnp = rdp->mynode;)
2447 2464
2448 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) 2465 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2449 return; 2466 return;
2450 2467
2451 RCU_TRACE(mask = rdp->grpmask;) 2468 RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);)
2452 trace_rcu_grace_period(rsp->name, 2469 trace_rcu_grace_period(rsp->name, rnp->gp_seq,
2453 rnp->gpnum + 1 - !!(rnp->qsmask & mask), 2470 blkd ? TPS("cpuofl") : TPS("cpuofl-bgp"));
2454 TPS("cpuofl"));
2455} 2471}
2456 2472
2457/* 2473/*
@@ -2463,7 +2479,7 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2463 * This function therefore goes up the tree of rcu_node structures, 2479 * This function therefore goes up the tree of rcu_node structures,
2464 * clearing the corresponding bits in the ->qsmaskinit fields. Note that 2480 * clearing the corresponding bits in the ->qsmaskinit fields. Note that
2465 * the leaf rcu_node structure's ->qsmaskinit field has already been 2481 * the leaf rcu_node structure's ->qsmaskinit field has already been
2466 * updated 2482 * updated.
2467 * 2483 *
2468 * This function does check that the specified rcu_node structure has 2484 * This function does check that the specified rcu_node structure has
2469 * all CPUs offline and no blocked tasks, so it is OK to invoke it 2485 * all CPUs offline and no blocked tasks, so it is OK to invoke it
@@ -2476,9 +2492,10 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2476 long mask; 2492 long mask;
2477 struct rcu_node *rnp = rnp_leaf; 2493 struct rcu_node *rnp = rnp_leaf;
2478 2494
2479 raw_lockdep_assert_held_rcu_node(rnp); 2495 raw_lockdep_assert_held_rcu_node(rnp_leaf);
2480 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || 2496 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
2481 rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) 2497 WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
2498 WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
2482 return; 2499 return;
2483 for (;;) { 2500 for (;;) {
2484 mask = rnp->grpmask; 2501 mask = rnp->grpmask;
@@ -2487,7 +2504,8 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2487 break; 2504 break;
2488 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 2505 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
2489 rnp->qsmaskinit &= ~mask; 2506 rnp->qsmaskinit &= ~mask;
2490 rnp->qsmask &= ~mask; 2507 /* Between grace periods, so better already be zero! */
2508 WARN_ON_ONCE(rnp->qsmask);
2491 if (rnp->qsmaskinit) { 2509 if (rnp->qsmaskinit) {
2492 raw_spin_unlock_rcu_node(rnp); 2510 raw_spin_unlock_rcu_node(rnp);
2493 /* irqs remain disabled. */ 2511 /* irqs remain disabled. */
@@ -2630,6 +2648,7 @@ void rcu_check_callbacks(int user)
2630 2648
2631 rcu_sched_qs(); 2649 rcu_sched_qs();
2632 rcu_bh_qs(); 2650 rcu_bh_qs();
2651 rcu_note_voluntary_context_switch(current);
2633 2652
2634 } else if (!in_softirq()) { 2653 } else if (!in_softirq()) {
2635 2654
@@ -2645,8 +2664,7 @@ void rcu_check_callbacks(int user)
2645 rcu_preempt_check_callbacks(); 2664 rcu_preempt_check_callbacks();
2646 if (rcu_pending()) 2665 if (rcu_pending())
2647 invoke_rcu_core(); 2666 invoke_rcu_core();
2648 if (user) 2667
2649 rcu_note_voluntary_context_switch(current);
2650 trace_rcu_utilization(TPS("End scheduler-tick")); 2668 trace_rcu_utilization(TPS("End scheduler-tick"));
2651} 2669}
2652 2670
@@ -2681,17 +2699,8 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
2681 /* rcu_initiate_boost() releases rnp->lock */ 2699 /* rcu_initiate_boost() releases rnp->lock */
2682 continue; 2700 continue;
2683 } 2701 }
2684 if (rnp->parent && 2702 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2685 (rnp->parent->qsmask & rnp->grpmask)) { 2703 continue;
2686 /*
2687 * Race between grace-period
2688 * initialization and task exiting RCU
2689 * read-side critical section: Report.
2690 */
2691 rcu_report_unblock_qs_rnp(rsp, rnp, flags);
2692 /* rcu_report_unblock_qs_rnp() rlses ->lock */
2693 continue;
2694 }
2695 } 2704 }
2696 for_each_leaf_node_possible_cpu(rnp, cpu) { 2705 for_each_leaf_node_possible_cpu(rnp, cpu) {
2697 unsigned long bit = leaf_node_cpu_bit(rnp, cpu); 2706 unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
@@ -2701,8 +2710,8 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
2701 } 2710 }
2702 } 2711 }
2703 if (mask != 0) { 2712 if (mask != 0) {
2704 /* Idle/offline CPUs, report (releases rnp->lock. */ 2713 /* Idle/offline CPUs, report (releases rnp->lock). */
2705 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); 2714 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
2706 } else { 2715 } else {
2707 /* Nothing to do here, so just drop the lock. */ 2716 /* Nothing to do here, so just drop the lock. */
2708 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2717 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2747,6 +2756,65 @@ static void force_quiescent_state(struct rcu_state *rsp)
2747} 2756}
2748 2757
2749/* 2758/*
2759 * This function checks for grace-period requests that fail to motivate
2760 * RCU to come out of its idle mode.
2761 */
2762static void
2763rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
2764 struct rcu_data *rdp)
2765{
2766 const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ;
2767 unsigned long flags;
2768 unsigned long j;
2769 struct rcu_node *rnp_root = rcu_get_root(rsp);
2770 static atomic_t warned = ATOMIC_INIT(0);
2771
2772 if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress(rsp) ||
2773 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
2774 return;
2775 j = jiffies; /* Expensive access, and in common case don't get here. */
2776 if (time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) ||
2777 time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) ||
2778 atomic_read(&warned))
2779 return;
2780
2781 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2782 j = jiffies;
2783 if (rcu_gp_in_progress(rsp) ||
2784 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
2785 time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) ||
2786 time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) ||
2787 atomic_read(&warned)) {
2788 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2789 return;
2790 }
2791 /* Hold onto the leaf lock to make others see warned==1. */
2792
2793 if (rnp_root != rnp)
2794 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
2795 j = jiffies;
2796 if (rcu_gp_in_progress(rsp) ||
2797 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
2798 time_before(j, rsp->gp_req_activity + gpssdelay) ||
2799 time_before(j, rsp->gp_activity + gpssdelay) ||
2800 atomic_xchg(&warned, 1)) {
2801 raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
2802 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2803 return;
2804 }
2805 pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n",
2806 __func__, (long)READ_ONCE(rsp->gp_seq),
2807 (long)READ_ONCE(rnp_root->gp_seq_needed),
2808 j - rsp->gp_req_activity, j - rsp->gp_activity,
2809 rsp->gp_flags, rsp->gp_state, rsp->name,
2810 rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL);
2811 WARN_ON(1);
2812 if (rnp_root != rnp)
2813 raw_spin_unlock_rcu_node(rnp_root);
2814 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2815}
2816
2817/*
2750 * This does the RCU core processing work for the specified rcu_state 2818 * This does the RCU core processing work for the specified rcu_state
2751 * and rcu_data structures. This may be called only from the CPU to 2819 * and rcu_data structures. This may be called only from the CPU to
2752 * whom the rdp belongs. 2820 * whom the rdp belongs.
@@ -2755,9 +2823,8 @@ static void
2755__rcu_process_callbacks(struct rcu_state *rsp) 2823__rcu_process_callbacks(struct rcu_state *rsp)
2756{ 2824{
2757 unsigned long flags; 2825 unsigned long flags;
2758 bool needwake;
2759 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 2826 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2760 struct rcu_node *rnp; 2827 struct rcu_node *rnp = rdp->mynode;
2761 2828
2762 WARN_ON_ONCE(!rdp->beenonline); 2829 WARN_ON_ONCE(!rdp->beenonline);
2763 2830
@@ -2768,18 +2835,13 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2768 if (!rcu_gp_in_progress(rsp) && 2835 if (!rcu_gp_in_progress(rsp) &&
2769 rcu_segcblist_is_enabled(&rdp->cblist)) { 2836 rcu_segcblist_is_enabled(&rdp->cblist)) {
2770 local_irq_save(flags); 2837 local_irq_save(flags);
2771 if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) { 2838 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
2772 local_irq_restore(flags); 2839 rcu_accelerate_cbs_unlocked(rsp, rnp, rdp);
2773 } else { 2840 local_irq_restore(flags);
2774 rnp = rdp->mynode;
2775 raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
2776 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
2777 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2778 if (needwake)
2779 rcu_gp_kthread_wake(rsp);
2780 }
2781 } 2841 }
2782 2842
2843 rcu_check_gp_start_stall(rsp, rnp, rdp);
2844
2783 /* If there are callbacks ready, invoke them. */ 2845 /* If there are callbacks ready, invoke them. */
2784 if (rcu_segcblist_ready_cbs(&rdp->cblist)) 2846 if (rcu_segcblist_ready_cbs(&rdp->cblist))
2785 invoke_rcu_callbacks(rsp, rdp); 2847 invoke_rcu_callbacks(rsp, rdp);
@@ -2833,8 +2895,6 @@ static void invoke_rcu_core(void)
2833static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, 2895static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2834 struct rcu_head *head, unsigned long flags) 2896 struct rcu_head *head, unsigned long flags)
2835{ 2897{
2836 bool needwake;
2837
2838 /* 2898 /*
2839 * If called from an extended quiescent state, invoke the RCU 2899 * If called from an extended quiescent state, invoke the RCU
2840 * core in order to force a re-evaluation of RCU's idleness. 2900 * core in order to force a re-evaluation of RCU's idleness.
@@ -2861,13 +2921,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2861 2921
2862 /* Start a new grace period if one not already started. */ 2922 /* Start a new grace period if one not already started. */
2863 if (!rcu_gp_in_progress(rsp)) { 2923 if (!rcu_gp_in_progress(rsp)) {
2864 struct rcu_node *rnp = rdp->mynode; 2924 rcu_accelerate_cbs_unlocked(rsp, rdp->mynode, rdp);
2865
2866 raw_spin_lock_rcu_node(rnp);
2867 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
2868 raw_spin_unlock_rcu_node(rnp);
2869 if (needwake)
2870 rcu_gp_kthread_wake(rsp);
2871 } else { 2925 } else {
2872 /* Give the grace period a kick. */ 2926 /* Give the grace period a kick. */
2873 rdp->blimit = LONG_MAX; 2927 rdp->blimit = LONG_MAX;
@@ -3037,7 +3091,7 @@ EXPORT_SYMBOL_GPL(kfree_call_rcu);
3037 * when there was in fact only one the whole time, as this just adds 3091 * when there was in fact only one the whole time, as this just adds
3038 * some overhead: RCU still operates correctly. 3092 * some overhead: RCU still operates correctly.
3039 */ 3093 */
3040static inline int rcu_blocking_is_gp(void) 3094static int rcu_blocking_is_gp(void)
3041{ 3095{
3042 int ret; 3096 int ret;
3043 3097
@@ -3136,16 +3190,10 @@ unsigned long get_state_synchronize_rcu(void)
3136{ 3190{
3137 /* 3191 /*
3138 * Any prior manipulation of RCU-protected data must happen 3192 * Any prior manipulation of RCU-protected data must happen
3139 * before the load from ->gpnum. 3193 * before the load from ->gp_seq.
3140 */ 3194 */
3141 smp_mb(); /* ^^^ */ 3195 smp_mb(); /* ^^^ */
3142 3196 return rcu_seq_snap(&rcu_state_p->gp_seq);
3143 /*
3144 * Make sure this load happens before the purportedly
3145 * time-consuming work between get_state_synchronize_rcu()
3146 * and cond_synchronize_rcu().
3147 */
3148 return smp_load_acquire(&rcu_state_p->gpnum);
3149} 3197}
3150EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); 3198EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
3151 3199
@@ -3165,15 +3213,10 @@ EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
3165 */ 3213 */
3166void cond_synchronize_rcu(unsigned long oldstate) 3214void cond_synchronize_rcu(unsigned long oldstate)
3167{ 3215{
3168 unsigned long newstate; 3216 if (!rcu_seq_done(&rcu_state_p->gp_seq, oldstate))
3169
3170 /*
3171 * Ensure that this load happens before any RCU-destructive
3172 * actions the caller might carry out after we return.
3173 */
3174 newstate = smp_load_acquire(&rcu_state_p->completed);
3175 if (ULONG_CMP_GE(oldstate, newstate))
3176 synchronize_rcu(); 3217 synchronize_rcu();
3218 else
3219 smp_mb(); /* Ensure GP ends before subsequent accesses. */
3177} 3220}
3178EXPORT_SYMBOL_GPL(cond_synchronize_rcu); 3221EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
3179 3222
@@ -3188,16 +3231,10 @@ unsigned long get_state_synchronize_sched(void)
3188{ 3231{
3189 /* 3232 /*
3190 * Any prior manipulation of RCU-protected data must happen 3233 * Any prior manipulation of RCU-protected data must happen
3191 * before the load from ->gpnum. 3234 * before the load from ->gp_seq.
3192 */ 3235 */
3193 smp_mb(); /* ^^^ */ 3236 smp_mb(); /* ^^^ */
3194 3237 return rcu_seq_snap(&rcu_sched_state.gp_seq);
3195 /*
3196 * Make sure this load happens before the purportedly
3197 * time-consuming work between get_state_synchronize_sched()
3198 * and cond_synchronize_sched().
3199 */
3200 return smp_load_acquire(&rcu_sched_state.gpnum);
3201} 3238}
3202EXPORT_SYMBOL_GPL(get_state_synchronize_sched); 3239EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
3203 3240
@@ -3217,15 +3254,10 @@ EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
3217 */ 3254 */
3218void cond_synchronize_sched(unsigned long oldstate) 3255void cond_synchronize_sched(unsigned long oldstate)
3219{ 3256{
3220 unsigned long newstate; 3257 if (!rcu_seq_done(&rcu_sched_state.gp_seq, oldstate))
3221
3222 /*
3223 * Ensure that this load happens before any RCU-destructive
3224 * actions the caller might carry out after we return.
3225 */
3226 newstate = smp_load_acquire(&rcu_sched_state.completed);
3227 if (ULONG_CMP_GE(oldstate, newstate))
3228 synchronize_sched(); 3258 synchronize_sched();
3259 else
3260 smp_mb(); /* Ensure GP ends before subsequent accesses. */
3229} 3261}
3230EXPORT_SYMBOL_GPL(cond_synchronize_sched); 3262EXPORT_SYMBOL_GPL(cond_synchronize_sched);
3231 3263
@@ -3261,12 +3293,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3261 !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) 3293 !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
3262 return 1; 3294 return 1;
3263 3295
3264 /* Has another RCU grace period completed? */ 3296 /* Have RCU grace period completed or started? */
3265 if (READ_ONCE(rnp->completed) != rdp->completed) /* outside lock */ 3297 if (rcu_seq_current(&rnp->gp_seq) != rdp->gp_seq ||
3266 return 1;
3267
3268 /* Has a new RCU grace period started? */
3269 if (READ_ONCE(rnp->gpnum) != rdp->gpnum ||
3270 unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */ 3298 unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */
3271 return 1; 3299 return 1;
3272 3300
@@ -3298,7 +3326,7 @@ static int rcu_pending(void)
3298 * non-NULL, store an indication of whether all callbacks are lazy. 3326 * non-NULL, store an indication of whether all callbacks are lazy.
3299 * (If there are no callbacks, all of them are deemed to be lazy.) 3327 * (If there are no callbacks, all of them are deemed to be lazy.)
3300 */ 3328 */
3301static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) 3329static bool rcu_cpu_has_callbacks(bool *all_lazy)
3302{ 3330{
3303 bool al = true; 3331 bool al = true;
3304 bool hc = false; 3332 bool hc = false;
@@ -3484,17 +3512,22 @@ EXPORT_SYMBOL_GPL(rcu_barrier_sched);
3484static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) 3512static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
3485{ 3513{
3486 long mask; 3514 long mask;
3515 long oldmask;
3487 struct rcu_node *rnp = rnp_leaf; 3516 struct rcu_node *rnp = rnp_leaf;
3488 3517
3489 raw_lockdep_assert_held_rcu_node(rnp); 3518 raw_lockdep_assert_held_rcu_node(rnp_leaf);
3519 WARN_ON_ONCE(rnp->wait_blkd_tasks);
3490 for (;;) { 3520 for (;;) {
3491 mask = rnp->grpmask; 3521 mask = rnp->grpmask;
3492 rnp = rnp->parent; 3522 rnp = rnp->parent;
3493 if (rnp == NULL) 3523 if (rnp == NULL)
3494 return; 3524 return;
3495 raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */ 3525 raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
3526 oldmask = rnp->qsmaskinit;
3496 rnp->qsmaskinit |= mask; 3527 rnp->qsmaskinit |= mask;
3497 raw_spin_unlock_rcu_node(rnp); /* Interrupts remain disabled. */ 3528 raw_spin_unlock_rcu_node(rnp); /* Interrupts remain disabled. */
3529 if (oldmask)
3530 return;
3498 } 3531 }
3499} 3532}
3500 3533
@@ -3511,6 +3544,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3511 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 3544 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
3512 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); 3545 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);
3513 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks))); 3546 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks)));
3547 rdp->rcu_ofl_gp_seq = rsp->gp_seq;
3548 rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
3549 rdp->rcu_onl_gp_seq = rsp->gp_seq;
3550 rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
3514 rdp->cpu = cpu; 3551 rdp->cpu = cpu;
3515 rdp->rsp = rsp; 3552 rdp->rsp = rsp;
3516 rcu_boot_init_nocb_percpu_data(rdp); 3553 rcu_boot_init_nocb_percpu_data(rdp);
@@ -3518,9 +3555,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3518 3555
3519/* 3556/*
3520 * Initialize a CPU's per-CPU RCU data. Note that only one online or 3557 * Initialize a CPU's per-CPU RCU data. Note that only one online or
3521 * offline event can be happening at a given time. Note also that we 3558 * offline event can be happening at a given time. Note also that we can
3522 * can accept some slop in the rsp->completed access due to the fact 3559 * accept some slop in the rsp->gp_seq access due to the fact that this
3523 * that this CPU cannot possibly have any RCU callbacks in flight yet. 3560 * CPU cannot possibly have any RCU callbacks in flight yet.
3524 */ 3561 */
3525static void 3562static void
3526rcu_init_percpu_data(int cpu, struct rcu_state *rsp) 3563rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
@@ -3549,14 +3586,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3549 rnp = rdp->mynode; 3586 rnp = rdp->mynode;
3550 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 3587 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
3551 rdp->beenonline = true; /* We have now been online. */ 3588 rdp->beenonline = true; /* We have now been online. */
3552 rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ 3589 rdp->gp_seq = rnp->gp_seq;
3553 rdp->completed = rnp->completed; 3590 rdp->gp_seq_needed = rnp->gp_seq;
3554 rdp->cpu_no_qs.b.norm = true; 3591 rdp->cpu_no_qs.b.norm = true;
3555 rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); 3592 rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
3556 rdp->core_needs_qs = false; 3593 rdp->core_needs_qs = false;
3557 rdp->rcu_iw_pending = false; 3594 rdp->rcu_iw_pending = false;
3558 rdp->rcu_iw_gpnum = rnp->gpnum - 1; 3595 rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
3559 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); 3596 trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuonl"));
3560 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3597 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3561} 3598}
3562 3599
@@ -3705,7 +3742,15 @@ void rcu_cpu_starting(unsigned int cpu)
3705 nbits = bitmap_weight(&oldmask, BITS_PER_LONG); 3742 nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
3706 /* Allow lockless access for expedited grace periods. */ 3743 /* Allow lockless access for expedited grace periods. */
3707 smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */ 3744 smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */
3708 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3745 rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
3746 rdp->rcu_onl_gp_seq = READ_ONCE(rsp->gp_seq);
3747 rdp->rcu_onl_gp_flags = READ_ONCE(rsp->gp_flags);
3748 if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
3749 /* Report QS -after- changing ->qsmaskinitnext! */
3750 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
3751 } else {
3752 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3753 }
3709 } 3754 }
3710 smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ 3755 smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
3711} 3756}
@@ -3713,7 +3758,7 @@ void rcu_cpu_starting(unsigned int cpu)
3713#ifdef CONFIG_HOTPLUG_CPU 3758#ifdef CONFIG_HOTPLUG_CPU
3714/* 3759/*
3715 * The CPU is exiting the idle loop into the arch_cpu_idle_dead() 3760 * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
3716 * function. We now remove it from the rcu_node tree's ->qsmaskinit 3761 * function. We now remove it from the rcu_node tree's ->qsmaskinitnext
3717 * bit masks. 3762 * bit masks.
3718 */ 3763 */
3719static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) 3764static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
@@ -3725,9 +3770,18 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
3725 3770
3726 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ 3771 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
3727 mask = rdp->grpmask; 3772 mask = rdp->grpmask;
3773 spin_lock(&rsp->ofl_lock);
3728 raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ 3774 raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
3775 rdp->rcu_ofl_gp_seq = READ_ONCE(rsp->gp_seq);
3776 rdp->rcu_ofl_gp_flags = READ_ONCE(rsp->gp_flags);
3777 if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */
3778 /* Report quiescent state -before- changing ->qsmaskinitnext! */
3779 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
3780 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3781 }
3729 rnp->qsmaskinitnext &= ~mask; 3782 rnp->qsmaskinitnext &= ~mask;
3730 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3783 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3784 spin_unlock(&rsp->ofl_lock);
3731} 3785}
3732 3786
3733/* 3787/*
@@ -3839,12 +3893,16 @@ static int __init rcu_spawn_gp_kthread(void)
3839 struct task_struct *t; 3893 struct task_struct *t;
3840 3894
3841 /* Force priority into range. */ 3895 /* Force priority into range. */
3842 if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) 3896 if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
3897 && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
3898 kthread_prio = 2;
3899 else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
3843 kthread_prio = 1; 3900 kthread_prio = 1;
3844 else if (kthread_prio < 0) 3901 else if (kthread_prio < 0)
3845 kthread_prio = 0; 3902 kthread_prio = 0;
3846 else if (kthread_prio > 99) 3903 else if (kthread_prio > 99)
3847 kthread_prio = 99; 3904 kthread_prio = 99;
3905
3848 if (kthread_prio != kthread_prio_in) 3906 if (kthread_prio != kthread_prio_in)
3849 pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n", 3907 pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
3850 kthread_prio, kthread_prio_in); 3908 kthread_prio, kthread_prio_in);
@@ -3928,8 +3986,9 @@ static void __init rcu_init_one(struct rcu_state *rsp)
3928 raw_spin_lock_init(&rnp->fqslock); 3986 raw_spin_lock_init(&rnp->fqslock);
3929 lockdep_set_class_and_name(&rnp->fqslock, 3987 lockdep_set_class_and_name(&rnp->fqslock,
3930 &rcu_fqs_class[i], fqs[i]); 3988 &rcu_fqs_class[i], fqs[i]);
3931 rnp->gpnum = rsp->gpnum; 3989 rnp->gp_seq = rsp->gp_seq;
3932 rnp->completed = rsp->completed; 3990 rnp->gp_seq_needed = rsp->gp_seq;
3991 rnp->completedqs = rsp->gp_seq;
3933 rnp->qsmask = 0; 3992 rnp->qsmask = 0;
3934 rnp->qsmaskinit = 0; 3993 rnp->qsmaskinit = 0;
3935 rnp->grplo = j * cpustride; 3994 rnp->grplo = j * cpustride;
@@ -3997,7 +4056,7 @@ static void __init rcu_init_geometry(void)
3997 if (rcu_fanout_leaf == RCU_FANOUT_LEAF && 4056 if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
3998 nr_cpu_ids == NR_CPUS) 4057 nr_cpu_ids == NR_CPUS)
3999 return; 4058 return;
4000 pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n", 4059 pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",
4001 rcu_fanout_leaf, nr_cpu_ids); 4060 rcu_fanout_leaf, nr_cpu_ids);
4002 4061
4003 /* 4062 /*
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 78e051dffc5b..4e74df768c57 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -81,18 +81,16 @@ struct rcu_node {
81 raw_spinlock_t __private lock; /* Root rcu_node's lock protects */ 81 raw_spinlock_t __private lock; /* Root rcu_node's lock protects */
82 /* some rcu_state fields as well as */ 82 /* some rcu_state fields as well as */
83 /* following. */ 83 /* following. */
84 unsigned long gpnum; /* Current grace period for this node. */ 84 unsigned long gp_seq; /* Track rsp->rcu_gp_seq. */
85 /* This will either be equal to or one */ 85 unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed. */
86 /* behind the root rcu_node's gpnum. */ 86 unsigned long completedqs; /* All QSes done for this node. */
87 unsigned long completed; /* Last GP completed for this node. */
88 /* This will either be equal to or one */
89 /* behind the root rcu_node's gpnum. */
90 unsigned long qsmask; /* CPUs or groups that need to switch in */ 87 unsigned long qsmask; /* CPUs or groups that need to switch in */
91 /* order for current grace period to proceed.*/ 88 /* order for current grace period to proceed.*/
92 /* In leaf rcu_node, each bit corresponds to */ 89 /* In leaf rcu_node, each bit corresponds to */
93 /* an rcu_data structure, otherwise, each */ 90 /* an rcu_data structure, otherwise, each */
94 /* bit corresponds to a child rcu_node */ 91 /* bit corresponds to a child rcu_node */
95 /* structure. */ 92 /* structure. */
93 unsigned long rcu_gp_init_mask; /* Mask of offline CPUs at GP init. */
96 unsigned long qsmaskinit; 94 unsigned long qsmaskinit;
97 /* Per-GP initial value for qsmask. */ 95 /* Per-GP initial value for qsmask. */
98 /* Initialized from ->qsmaskinitnext at the */ 96 /* Initialized from ->qsmaskinitnext at the */
@@ -158,7 +156,6 @@ struct rcu_node {
158 struct swait_queue_head nocb_gp_wq[2]; 156 struct swait_queue_head nocb_gp_wq[2];
159 /* Place for rcu_nocb_kthread() to wait GP. */ 157 /* Place for rcu_nocb_kthread() to wait GP. */
160#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 158#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
161 u8 need_future_gp[4]; /* Counts of upcoming GP requests. */
162 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; 159 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
163 160
164 spinlock_t exp_lock ____cacheline_internodealigned_in_smp; 161 spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
@@ -168,22 +165,6 @@ struct rcu_node {
168 bool exp_need_flush; /* Need to flush workitem? */ 165 bool exp_need_flush; /* Need to flush workitem? */
169} ____cacheline_internodealigned_in_smp; 166} ____cacheline_internodealigned_in_smp;
170 167
171/* Accessors for ->need_future_gp[] array. */
172#define need_future_gp_mask() \
173 (ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1)
174#define need_future_gp_element(rnp, c) \
175 ((rnp)->need_future_gp[(c) & need_future_gp_mask()])
176#define need_any_future_gp(rnp) \
177({ \
178 int __i; \
179 bool __nonzero = false; \
180 \
181 for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++) \
182 __nonzero = __nonzero || \
183 READ_ONCE((rnp)->need_future_gp[__i]); \
184 __nonzero; \
185})
186
187/* 168/*
188 * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and 169 * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
189 * are indexed relative to this interval rather than the global CPU ID space. 170 * are indexed relative to this interval rather than the global CPU ID space.
@@ -206,16 +187,14 @@ union rcu_noqs {
206/* Per-CPU data for read-copy update. */ 187/* Per-CPU data for read-copy update. */
207struct rcu_data { 188struct rcu_data {
208 /* 1) quiescent-state and grace-period handling : */ 189 /* 1) quiescent-state and grace-period handling : */
209 unsigned long completed; /* Track rsp->completed gp number */ 190 unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */
210 /* in order to detect GP end. */ 191 unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */
211 unsigned long gpnum; /* Highest gp number that this CPU */
212 /* is aware of having started. */
213 unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ 192 unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */
214 /* for rcu_all_qs() invocations. */ 193 /* for rcu_all_qs() invocations. */
215 union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ 194 union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */
216 bool core_needs_qs; /* Core waits for quiesc state. */ 195 bool core_needs_qs; /* Core waits for quiesc state. */
217 bool beenonline; /* CPU online at least once. */ 196 bool beenonline; /* CPU online at least once. */
218 bool gpwrap; /* Possible gpnum/completed wrap. */ 197 bool gpwrap; /* Possible ->gp_seq wrap. */
219 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 198 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
220 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 199 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
221 unsigned long ticks_this_gp; /* The number of scheduling-clock */ 200 unsigned long ticks_this_gp; /* The number of scheduling-clock */
@@ -239,7 +218,6 @@ struct rcu_data {
239 218
240 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 219 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
241 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 220 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
242 unsigned long offline_fqs; /* Kicked due to being offline. */
243 unsigned long cond_resched_completed; 221 unsigned long cond_resched_completed;
244 /* Grace period that needs help */ 222 /* Grace period that needs help */
245 /* from cond_resched(). */ 223 /* from cond_resched(). */
@@ -278,12 +256,16 @@ struct rcu_data {
278 /* Leader CPU takes GP-end wakeups. */ 256 /* Leader CPU takes GP-end wakeups. */
279#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 257#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
280 258
281 /* 7) RCU CPU stall data. */ 259 /* 7) Diagnostic data, including RCU CPU stall warnings. */
282 unsigned int softirq_snap; /* Snapshot of softirq activity. */ 260 unsigned int softirq_snap; /* Snapshot of softirq activity. */
283 /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */ 261 /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
284 struct irq_work rcu_iw; /* Check for non-irq activity. */ 262 struct irq_work rcu_iw; /* Check for non-irq activity. */
285 bool rcu_iw_pending; /* Is ->rcu_iw pending? */ 263 bool rcu_iw_pending; /* Is ->rcu_iw pending? */
286 unsigned long rcu_iw_gpnum; /* ->gpnum associated with ->rcu_iw. */ 264 unsigned long rcu_iw_gp_seq; /* ->gp_seq associated with ->rcu_iw. */
265 unsigned long rcu_ofl_gp_seq; /* ->gp_seq at last offline. */
266 short rcu_ofl_gp_flags; /* ->gp_flags at last offline. */
267 unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */
268 short rcu_onl_gp_flags; /* ->gp_flags at last online. */
287 269
288 int cpu; 270 int cpu;
289 struct rcu_state *rsp; 271 struct rcu_state *rsp;
@@ -340,8 +322,7 @@ struct rcu_state {
340 322
341 u8 boost ____cacheline_internodealigned_in_smp; 323 u8 boost ____cacheline_internodealigned_in_smp;
342 /* Subject to priority boost. */ 324 /* Subject to priority boost. */
343 unsigned long gpnum; /* Current gp number. */ 325 unsigned long gp_seq; /* Grace-period sequence #. */
344 unsigned long completed; /* # of last completed gp. */
345 struct task_struct *gp_kthread; /* Task for grace periods. */ 326 struct task_struct *gp_kthread; /* Task for grace periods. */
346 struct swait_queue_head gp_wq; /* Where GP task waits. */ 327 struct swait_queue_head gp_wq; /* Where GP task waits. */
347 short gp_flags; /* Commands for GP task. */ 328 short gp_flags; /* Commands for GP task. */
@@ -373,6 +354,8 @@ struct rcu_state {
373 /* but in jiffies. */ 354 /* but in jiffies. */
374 unsigned long gp_activity; /* Time of last GP kthread */ 355 unsigned long gp_activity; /* Time of last GP kthread */
375 /* activity in jiffies. */ 356 /* activity in jiffies. */
357 unsigned long gp_req_activity; /* Time of last GP request */
358 /* in jiffies. */
376 unsigned long jiffies_stall; /* Time at which to check */ 359 unsigned long jiffies_stall; /* Time at which to check */
377 /* for CPU stalls. */ 360 /* for CPU stalls. */
378 unsigned long jiffies_resched; /* Time at which to resched */ 361 unsigned long jiffies_resched; /* Time at which to resched */
@@ -384,6 +367,10 @@ struct rcu_state {
384 const char *name; /* Name of structure. */ 367 const char *name; /* Name of structure. */
385 char abbr; /* Abbreviated name. */ 368 char abbr; /* Abbreviated name. */
386 struct list_head flavors; /* List of RCU flavors. */ 369 struct list_head flavors; /* List of RCU flavors. */
370
371 spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
372 /* Synchronize offline with */
373 /* GP pre-initialization. */
387}; 374};
388 375
389/* Values for rcu_state structure's gp_flags field. */ 376/* Values for rcu_state structure's gp_flags field. */
@@ -394,16 +381,20 @@ struct rcu_state {
394#define RCU_GP_IDLE 0 /* Initial state and no GP in progress. */ 381#define RCU_GP_IDLE 0 /* Initial state and no GP in progress. */
395#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */ 382#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */
396#define RCU_GP_DONE_GPS 2 /* Wait done for grace-period start. */ 383#define RCU_GP_DONE_GPS 2 /* Wait done for grace-period start. */
397#define RCU_GP_WAIT_FQS 3 /* Wait for force-quiescent-state time. */ 384#define RCU_GP_ONOFF 3 /* Grace-period initialization hotplug. */
398#define RCU_GP_DOING_FQS 4 /* Wait done for force-quiescent-state time. */ 385#define RCU_GP_INIT 4 /* Grace-period initialization. */
399#define RCU_GP_CLEANUP 5 /* Grace-period cleanup started. */ 386#define RCU_GP_WAIT_FQS 5 /* Wait for force-quiescent-state time. */
400#define RCU_GP_CLEANED 6 /* Grace-period cleanup complete. */ 387#define RCU_GP_DOING_FQS 6 /* Wait done for force-quiescent-state time. */
388#define RCU_GP_CLEANUP 7 /* Grace-period cleanup started. */
389#define RCU_GP_CLEANED 8 /* Grace-period cleanup complete. */
401 390
402#ifndef RCU_TREE_NONCORE 391#ifndef RCU_TREE_NONCORE
403static const char * const gp_state_names[] = { 392static const char * const gp_state_names[] = {
404 "RCU_GP_IDLE", 393 "RCU_GP_IDLE",
405 "RCU_GP_WAIT_GPS", 394 "RCU_GP_WAIT_GPS",
406 "RCU_GP_DONE_GPS", 395 "RCU_GP_DONE_GPS",
396 "RCU_GP_ONOFF",
397 "RCU_GP_INIT",
407 "RCU_GP_WAIT_FQS", 398 "RCU_GP_WAIT_FQS",
408 "RCU_GP_DOING_FQS", 399 "RCU_GP_DOING_FQS",
409 "RCU_GP_CLEANUP", 400 "RCU_GP_CLEANUP",
@@ -449,10 +440,13 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
449static void rcu_print_detail_task_stall(struct rcu_state *rsp); 440static void rcu_print_detail_task_stall(struct rcu_state *rsp);
450static int rcu_print_task_stall(struct rcu_node *rnp); 441static int rcu_print_task_stall(struct rcu_node *rnp);
451static int rcu_print_task_exp_stall(struct rcu_node *rnp); 442static int rcu_print_task_exp_stall(struct rcu_node *rnp);
452static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 443static void rcu_preempt_check_blocked_tasks(struct rcu_state *rsp,
444 struct rcu_node *rnp);
453static void rcu_preempt_check_callbacks(void); 445static void rcu_preempt_check_callbacks(void);
454void call_rcu(struct rcu_head *head, rcu_callback_t func); 446void call_rcu(struct rcu_head *head, rcu_callback_t func);
455static void __init __rcu_init_preempt(void); 447static void __init __rcu_init_preempt(void);
448static void dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp,
449 int ncheck);
456static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 450static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
457static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 451static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
458static void invoke_rcu_callbacks_kthread(void); 452static void invoke_rcu_callbacks_kthread(void);
@@ -489,7 +483,6 @@ static void __init rcu_spawn_nocb_kthreads(void);
489#ifdef CONFIG_RCU_NOCB_CPU 483#ifdef CONFIG_RCU_NOCB_CPU
490static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp); 484static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
491#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 485#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
492static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
493static bool init_nocb_callback_list(struct rcu_data *rdp); 486static bool init_nocb_callback_list(struct rcu_data *rdp);
494static void rcu_bind_gp_kthread(void); 487static void rcu_bind_gp_kthread(void);
495static bool rcu_nohz_full_cpu(struct rcu_state *rsp); 488static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index d40708e8c5d6..0b2c2ad69629 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -212,7 +212,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
212 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 212 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
213 if (wake) { 213 if (wake) {
214 smp_mb(); /* EGP done before wake_up(). */ 214 smp_mb(); /* EGP done before wake_up(). */
215 swake_up(&rsp->expedited_wq); 215 swake_up_one(&rsp->expedited_wq);
216 } 216 }
217 break; 217 break;
218 } 218 }
@@ -472,6 +472,7 @@ retry_ipi:
472static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, 472static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
473 smp_call_func_t func) 473 smp_call_func_t func)
474{ 474{
475 int cpu;
475 struct rcu_node *rnp; 476 struct rcu_node *rnp;
476 477
477 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); 478 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
@@ -486,13 +487,20 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
486 rnp->rew.rew_func = func; 487 rnp->rew.rew_func = func;
487 rnp->rew.rew_rsp = rsp; 488 rnp->rew.rew_rsp = rsp;
488 if (!READ_ONCE(rcu_par_gp_wq) || 489 if (!READ_ONCE(rcu_par_gp_wq) ||
489 rcu_scheduler_active != RCU_SCHEDULER_RUNNING) { 490 rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
490 /* No workqueues yet. */ 491 rcu_is_last_leaf_node(rsp, rnp)) {
492 /* No workqueues yet or last leaf, do direct call. */
491 sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work); 493 sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
492 continue; 494 continue;
493 } 495 }
494 INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); 496 INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
495 queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work); 497 preempt_disable();
498 cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask);
499 /* If all offline, queue the work on an unbound CPU. */
500 if (unlikely(cpu > rnp->grphi))
501 cpu = WORK_CPU_UNBOUND;
502 queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
503 preempt_enable();
496 rnp->exp_need_flush = true; 504 rnp->exp_need_flush = true;
497 } 505 }
498 506
@@ -518,7 +526,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
518 jiffies_start = jiffies; 526 jiffies_start = jiffies;
519 527
520 for (;;) { 528 for (;;) {
521 ret = swait_event_timeout( 529 ret = swait_event_timeout_exclusive(
522 rsp->expedited_wq, 530 rsp->expedited_wq,
523 sync_rcu_preempt_exp_done_unlocked(rnp_root), 531 sync_rcu_preempt_exp_done_unlocked(rnp_root),
524 jiffies_stall); 532 jiffies_stall);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7fd12039e512..a97c20ea9bce 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -74,8 +74,8 @@ static void __init rcu_bootup_announce_oddness(void)
74 pr_info("\tRCU event tracing is enabled.\n"); 74 pr_info("\tRCU event tracing is enabled.\n");
75 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || 75 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
76 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) 76 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
77 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 77 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n",
78 RCU_FANOUT); 78 RCU_FANOUT);
79 if (rcu_fanout_exact) 79 if (rcu_fanout_exact)
80 pr_info("\tHierarchical RCU autobalancing is disabled.\n"); 80 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
81 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) 81 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
@@ -88,11 +88,13 @@ static void __init rcu_bootup_announce_oddness(void)
88 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", 88 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
89 RCU_FANOUT_LEAF); 89 RCU_FANOUT_LEAF);
90 if (rcu_fanout_leaf != RCU_FANOUT_LEAF) 90 if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
91 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 91 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n",
92 rcu_fanout_leaf);
92 if (nr_cpu_ids != NR_CPUS) 93 if (nr_cpu_ids != NR_CPUS)
93 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids); 94 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids);
94#ifdef CONFIG_RCU_BOOST 95#ifdef CONFIG_RCU_BOOST
95 pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", kthread_prio, CONFIG_RCU_BOOST_DELAY); 96 pr_info("\tRCU priority boosting: priority %d delay %d ms.\n",
97 kthread_prio, CONFIG_RCU_BOOST_DELAY);
96#endif 98#endif
97 if (blimit != DEFAULT_RCU_BLIMIT) 99 if (blimit != DEFAULT_RCU_BLIMIT)
98 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit); 100 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
@@ -127,6 +129,7 @@ static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
127 129
128static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 130static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
129 bool wake); 131 bool wake);
132static void rcu_read_unlock_special(struct task_struct *t);
130 133
131/* 134/*
132 * Tell them what RCU they are running. 135 * Tell them what RCU they are running.
@@ -183,6 +186,9 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
183 raw_lockdep_assert_held_rcu_node(rnp); 186 raw_lockdep_assert_held_rcu_node(rnp);
184 WARN_ON_ONCE(rdp->mynode != rnp); 187 WARN_ON_ONCE(rdp->mynode != rnp);
185 WARN_ON_ONCE(!rcu_is_leaf_node(rnp)); 188 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
189 /* RCU better not be waiting on newly onlined CPUs! */
190 WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask &
191 rdp->grpmask);
186 192
187 /* 193 /*
188 * Decide where to queue the newly blocked task. In theory, 194 * Decide where to queue the newly blocked task. In theory,
@@ -260,8 +266,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
260 * ->exp_tasks pointers, respectively, to reference the newly 266 * ->exp_tasks pointers, respectively, to reference the newly
261 * blocked tasks. 267 * blocked tasks.
262 */ 268 */
263 if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) 269 if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
264 rnp->gp_tasks = &t->rcu_node_entry; 270 rnp->gp_tasks = &t->rcu_node_entry;
271 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
272 }
265 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) 273 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
266 rnp->exp_tasks = &t->rcu_node_entry; 274 rnp->exp_tasks = &t->rcu_node_entry;
267 WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) != 275 WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
@@ -286,20 +294,24 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
286} 294}
287 295
288/* 296/*
289 * Record a preemptible-RCU quiescent state for the specified CPU. Note 297 * Record a preemptible-RCU quiescent state for the specified CPU.
290 * that this just means that the task currently running on the CPU is 298 * Note that this does not necessarily mean that the task currently running
291 * not in a quiescent state. There might be any number of tasks blocked 299 * on the CPU is in a quiescent state: Instead, it means that the current
292 * while in an RCU read-side critical section. 300 * grace period need not wait on any RCU read-side critical section that
301 * starts later on this CPU. It also means that if the current task is
302 * in an RCU read-side critical section, it has already added itself to
303 * some leaf rcu_node structure's ->blkd_tasks list. In addition to the
304 * current task, there might be any number of other tasks blocked while
305 * in an RCU read-side critical section.
293 * 306 *
294 * As with the other rcu_*_qs() functions, callers to this function 307 * Callers to this function must disable preemption.
295 * must disable preemption.
296 */ 308 */
297static void rcu_preempt_qs(void) 309static void rcu_preempt_qs(void)
298{ 310{
299 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n"); 311 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
300 if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) { 312 if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
301 trace_rcu_grace_period(TPS("rcu_preempt"), 313 trace_rcu_grace_period(TPS("rcu_preempt"),
302 __this_cpu_read(rcu_data_p->gpnum), 314 __this_cpu_read(rcu_data_p->gp_seq),
303 TPS("cpuqs")); 315 TPS("cpuqs"));
304 __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false); 316 __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
305 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ 317 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
@@ -348,8 +360,8 @@ static void rcu_preempt_note_context_switch(bool preempt)
348 trace_rcu_preempt_task(rdp->rsp->name, 360 trace_rcu_preempt_task(rdp->rsp->name,
349 t->pid, 361 t->pid,
350 (rnp->qsmask & rdp->grpmask) 362 (rnp->qsmask & rdp->grpmask)
351 ? rnp->gpnum 363 ? rnp->gp_seq
352 : rnp->gpnum + 1); 364 : rcu_seq_snap(&rnp->gp_seq));
353 rcu_preempt_ctxt_queue(rnp, rdp); 365 rcu_preempt_ctxt_queue(rnp, rdp);
354 } else if (t->rcu_read_lock_nesting < 0 && 366 } else if (t->rcu_read_lock_nesting < 0 &&
355 t->rcu_read_unlock_special.s) { 367 t->rcu_read_unlock_special.s) {
@@ -456,7 +468,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
456 * notify RCU core processing or task having blocked during the RCU 468 * notify RCU core processing or task having blocked during the RCU
457 * read-side critical section. 469 * read-side critical section.
458 */ 470 */
459void rcu_read_unlock_special(struct task_struct *t) 471static void rcu_read_unlock_special(struct task_struct *t)
460{ 472{
461 bool empty_exp; 473 bool empty_exp;
462 bool empty_norm; 474 bool empty_norm;
@@ -535,13 +547,15 @@ void rcu_read_unlock_special(struct task_struct *t)
535 WARN_ON_ONCE(rnp != t->rcu_blocked_node); 547 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
536 WARN_ON_ONCE(!rcu_is_leaf_node(rnp)); 548 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
537 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); 549 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
550 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
551 (!empty_norm || rnp->qsmask));
538 empty_exp = sync_rcu_preempt_exp_done(rnp); 552 empty_exp = sync_rcu_preempt_exp_done(rnp);
539 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 553 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
540 np = rcu_next_node_entry(t, rnp); 554 np = rcu_next_node_entry(t, rnp);
541 list_del_init(&t->rcu_node_entry); 555 list_del_init(&t->rcu_node_entry);
542 t->rcu_blocked_node = NULL; 556 t->rcu_blocked_node = NULL;
543 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"), 557 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
544 rnp->gpnum, t->pid); 558 rnp->gp_seq, t->pid);
545 if (&t->rcu_node_entry == rnp->gp_tasks) 559 if (&t->rcu_node_entry == rnp->gp_tasks)
546 rnp->gp_tasks = np; 560 rnp->gp_tasks = np;
547 if (&t->rcu_node_entry == rnp->exp_tasks) 561 if (&t->rcu_node_entry == rnp->exp_tasks)
@@ -562,7 +576,7 @@ void rcu_read_unlock_special(struct task_struct *t)
562 empty_exp_now = sync_rcu_preempt_exp_done(rnp); 576 empty_exp_now = sync_rcu_preempt_exp_done(rnp);
563 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) { 577 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
564 trace_rcu_quiescent_state_report(TPS("preempt_rcu"), 578 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
565 rnp->gpnum, 579 rnp->gp_seq,
566 0, rnp->qsmask, 580 0, rnp->qsmask,
567 rnp->level, 581 rnp->level,
568 rnp->grplo, 582 rnp->grplo,
@@ -686,24 +700,27 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
686 * Check that the list of blocked tasks for the newly completed grace 700 * Check that the list of blocked tasks for the newly completed grace
687 * period is in fact empty. It is a serious bug to complete a grace 701 * period is in fact empty. It is a serious bug to complete a grace
688 * period that still has RCU readers blocked! This function must be 702 * period that still has RCU readers blocked! This function must be
689 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 703 * invoked -before- updating this rnp's ->gp_seq, and the rnp's ->lock
690 * must be held by the caller. 704 * must be held by the caller.
691 * 705 *
692 * Also, if there are blocked tasks on the list, they automatically 706 * Also, if there are blocked tasks on the list, they automatically
693 * block the newly created grace period, so set up ->gp_tasks accordingly. 707 * block the newly created grace period, so set up ->gp_tasks accordingly.
694 */ 708 */
695static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 709static void
710rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
696{ 711{
697 struct task_struct *t; 712 struct task_struct *t;
698 713
699 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); 714 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
700 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 715 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
701 if (rcu_preempt_has_tasks(rnp)) { 716 dump_blkd_tasks(rsp, rnp, 10);
717 if (rcu_preempt_has_tasks(rnp) &&
718 (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
702 rnp->gp_tasks = rnp->blkd_tasks.next; 719 rnp->gp_tasks = rnp->blkd_tasks.next;
703 t = container_of(rnp->gp_tasks, struct task_struct, 720 t = container_of(rnp->gp_tasks, struct task_struct,
704 rcu_node_entry); 721 rcu_node_entry);
705 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"), 722 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
706 rnp->gpnum, t->pid); 723 rnp->gp_seq, t->pid);
707 } 724 }
708 WARN_ON_ONCE(rnp->qsmask); 725 WARN_ON_ONCE(rnp->qsmask);
709} 726}
@@ -717,6 +734,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
717 */ 734 */
718static void rcu_preempt_check_callbacks(void) 735static void rcu_preempt_check_callbacks(void)
719{ 736{
737 struct rcu_state *rsp = &rcu_preempt_state;
720 struct task_struct *t = current; 738 struct task_struct *t = current;
721 739
722 if (t->rcu_read_lock_nesting == 0) { 740 if (t->rcu_read_lock_nesting == 0) {
@@ -725,7 +743,9 @@ static void rcu_preempt_check_callbacks(void)
725 } 743 }
726 if (t->rcu_read_lock_nesting > 0 && 744 if (t->rcu_read_lock_nesting > 0 &&
727 __this_cpu_read(rcu_data_p->core_needs_qs) && 745 __this_cpu_read(rcu_data_p->core_needs_qs) &&
728 __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm)) 746 __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) &&
747 !t->rcu_read_unlock_special.b.need_qs &&
748 time_after(jiffies, rsp->gp_start + HZ))
729 t->rcu_read_unlock_special.b.need_qs = true; 749 t->rcu_read_unlock_special.b.need_qs = true;
730} 750}
731 751
@@ -841,6 +861,47 @@ void exit_rcu(void)
841 __rcu_read_unlock(); 861 __rcu_read_unlock();
842} 862}
843 863
864/*
865 * Dump the blocked-tasks state, but limit the list dump to the
866 * specified number of elements.
867 */
868static void
869dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)
870{
871 int cpu;
872 int i;
873 struct list_head *lhp;
874 bool onl;
875 struct rcu_data *rdp;
876 struct rcu_node *rnp1;
877
878 raw_lockdep_assert_held_rcu_node(rnp);
879 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
880 __func__, rnp->grplo, rnp->grphi, rnp->level,
881 (long)rnp->gp_seq, (long)rnp->completedqs);
882 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
883 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
884 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
885 pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
886 __func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks);
887 pr_info("%s: ->blkd_tasks", __func__);
888 i = 0;
889 list_for_each(lhp, &rnp->blkd_tasks) {
890 pr_cont(" %p", lhp);
891 if (++i >= 10)
892 break;
893 }
894 pr_cont("\n");
895 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
896 rdp = per_cpu_ptr(rsp->rda, cpu);
897 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
898 pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
899 cpu, ".o"[onl],
900 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
901 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
902 }
903}
904
844#else /* #ifdef CONFIG_PREEMPT_RCU */ 905#else /* #ifdef CONFIG_PREEMPT_RCU */
845 906
846static struct rcu_state *const rcu_state_p = &rcu_sched_state; 907static struct rcu_state *const rcu_state_p = &rcu_sched_state;
@@ -911,7 +972,8 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
911 * so there is no need to check for blocked tasks. So check only for 972 * so there is no need to check for blocked tasks. So check only for
912 * bogus qsmask values. 973 * bogus qsmask values.
913 */ 974 */
914static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 975static void
976rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
915{ 977{
916 WARN_ON_ONCE(rnp->qsmask); 978 WARN_ON_ONCE(rnp->qsmask);
917} 979}
@@ -949,6 +1011,15 @@ void exit_rcu(void)
949{ 1011{
950} 1012}
951 1013
1014/*
1015 * Dump the guaranteed-empty blocked-tasks state. Trust but verify.
1016 */
1017static void
1018dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)
1019{
1020 WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
1021}
1022
952#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 1023#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
953 1024
954#ifdef CONFIG_RCU_BOOST 1025#ifdef CONFIG_RCU_BOOST
@@ -1433,7 +1504,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1433 * completed since we last checked and there are 1504 * completed since we last checked and there are
1434 * callbacks not yet ready to invoke. 1505 * callbacks not yet ready to invoke.
1435 */ 1506 */
1436 if ((rdp->completed != rnp->completed || 1507 if ((rcu_seq_completed_gp(rdp->gp_seq,
1508 rcu_seq_current(&rnp->gp_seq)) ||
1437 unlikely(READ_ONCE(rdp->gpwrap))) && 1509 unlikely(READ_ONCE(rdp->gpwrap))) &&
1438 rcu_segcblist_pend_cbs(&rdp->cblist)) 1510 rcu_segcblist_pend_cbs(&rdp->cblist))
1439 note_gp_changes(rsp, rdp); 1511 note_gp_changes(rsp, rdp);
@@ -1720,16 +1792,16 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1720 */ 1792 */
1721 touch_nmi_watchdog(); 1793 touch_nmi_watchdog();
1722 1794
1723 if (rsp->gpnum == rdp->gpnum) { 1795 ticks_value = rcu_seq_ctr(rsp->gp_seq - rdp->gp_seq);
1796 if (ticks_value) {
1797 ticks_title = "GPs behind";
1798 } else {
1724 ticks_title = "ticks this GP"; 1799 ticks_title = "ticks this GP";
1725 ticks_value = rdp->ticks_this_gp; 1800 ticks_value = rdp->ticks_this_gp;
1726 } else {
1727 ticks_title = "GPs behind";
1728 ticks_value = rsp->gpnum - rdp->gpnum;
1729 } 1801 }
1730 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 1802 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
1731 delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum; 1803 delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
1732 pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n", 1804 pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
1733 cpu, 1805 cpu,
1734 "O."[!!cpu_online(cpu)], 1806 "O."[!!cpu_online(cpu)],
1735 "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], 1807 "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
@@ -1817,7 +1889,7 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
1817 1889
1818static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) 1890static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
1819{ 1891{
1820 return &rnp->nocb_gp_wq[rnp->completed & 0x1]; 1892 return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1];
1821} 1893}
1822 1894
1823static void rcu_init_one_nocb(struct rcu_node *rnp) 1895static void rcu_init_one_nocb(struct rcu_node *rnp)
@@ -1854,8 +1926,8 @@ static void __wake_nocb_leader(struct rcu_data *rdp, bool force,
1854 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); 1926 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
1855 del_timer(&rdp->nocb_timer); 1927 del_timer(&rdp->nocb_timer);
1856 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); 1928 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1857 smp_mb(); /* ->nocb_leader_sleep before swake_up(). */ 1929 smp_mb(); /* ->nocb_leader_sleep before swake_up_one(). */
1858 swake_up(&rdp_leader->nocb_wq); 1930 swake_up_one(&rdp_leader->nocb_wq);
1859 } else { 1931 } else {
1860 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); 1932 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1861 } 1933 }
@@ -2069,12 +2141,17 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2069 bool needwake; 2141 bool needwake;
2070 struct rcu_node *rnp = rdp->mynode; 2142 struct rcu_node *rnp = rdp->mynode;
2071 2143
2072 raw_spin_lock_irqsave_rcu_node(rnp, flags); 2144 local_irq_save(flags);
2073 c = rcu_cbs_completed(rdp->rsp, rnp); 2145 c = rcu_seq_snap(&rdp->rsp->gp_seq);
2074 needwake = rcu_start_this_gp(rnp, rdp, c); 2146 if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
2075 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2147 local_irq_restore(flags);
2076 if (needwake) 2148 } else {
2077 rcu_gp_kthread_wake(rdp->rsp); 2149 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
2150 needwake = rcu_start_this_gp(rnp, rdp, c);
2151 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2152 if (needwake)
2153 rcu_gp_kthread_wake(rdp->rsp);
2154 }
2078 2155
2079 /* 2156 /*
2080 * Wait for the grace period. Do so interruptibly to avoid messing 2157 * Wait for the grace period. Do so interruptibly to avoid messing
@@ -2082,9 +2159,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2082 */ 2159 */
2083 trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait")); 2160 trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
2084 for (;;) { 2161 for (;;) {
2085 swait_event_interruptible( 2162 swait_event_interruptible_exclusive(
2086 rnp->nocb_gp_wq[c & 0x1], 2163 rnp->nocb_gp_wq[rcu_seq_ctr(c) & 0x1],
2087 (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c))); 2164 (d = rcu_seq_done(&rnp->gp_seq, c)));
2088 if (likely(d)) 2165 if (likely(d))
2089 break; 2166 break;
2090 WARN_ON(signal_pending(current)); 2167 WARN_ON(signal_pending(current));
@@ -2111,7 +2188,7 @@ wait_again:
2111 /* Wait for callbacks to appear. */ 2188 /* Wait for callbacks to appear. */
2112 if (!rcu_nocb_poll) { 2189 if (!rcu_nocb_poll) {
2113 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep")); 2190 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));
2114 swait_event_interruptible(my_rdp->nocb_wq, 2191 swait_event_interruptible_exclusive(my_rdp->nocb_wq,
2115 !READ_ONCE(my_rdp->nocb_leader_sleep)); 2192 !READ_ONCE(my_rdp->nocb_leader_sleep));
2116 raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags); 2193 raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
2117 my_rdp->nocb_leader_sleep = true; 2194 my_rdp->nocb_leader_sleep = true;
@@ -2176,7 +2253,7 @@ wait_again:
2176 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); 2253 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
2177 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { 2254 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
2178 /* List was empty, so wake up the follower. */ 2255 /* List was empty, so wake up the follower. */
2179 swake_up(&rdp->nocb_wq); 2256 swake_up_one(&rdp->nocb_wq);
2180 } 2257 }
2181 } 2258 }
2182 2259
@@ -2193,7 +2270,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
2193{ 2270{
2194 for (;;) { 2271 for (;;) {
2195 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep")); 2272 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));
2196 swait_event_interruptible(rdp->nocb_wq, 2273 swait_event_interruptible_exclusive(rdp->nocb_wq,
2197 READ_ONCE(rdp->nocb_follower_head)); 2274 READ_ONCE(rdp->nocb_follower_head));
2198 if (smp_load_acquire(&rdp->nocb_follower_head)) { 2275 if (smp_load_acquire(&rdp->nocb_follower_head)) {
2199 /* ^^^ Ensure CB invocation follows _head test. */ 2276 /* ^^^ Ensure CB invocation follows _head test. */
@@ -2569,23 +2646,6 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2569#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 2646#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
2570 2647
2571/* 2648/*
2572 * An adaptive-ticks CPU can potentially execute in kernel mode for an
2573 * arbitrarily long period of time with the scheduling-clock tick turned
2574 * off. RCU will be paying attention to this CPU because it is in the
2575 * kernel, but the CPU cannot be guaranteed to be executing the RCU state
2576 * machine because the scheduling-clock tick has been disabled. Therefore,
2577 * if an adaptive-ticks CPU is failing to respond to the current grace
2578 * period and has not be idle from an RCU perspective, kick it.
2579 */
2580static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
2581{
2582#ifdef CONFIG_NO_HZ_FULL
2583 if (tick_nohz_full_cpu(cpu))
2584 smp_send_reschedule(cpu);
2585#endif /* #ifdef CONFIG_NO_HZ_FULL */
2586}
2587
2588/*
2589 * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the 2649 * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
2590 * grace-period kthread will do force_quiescent_state() processing? 2650 * grace-period kthread will do force_quiescent_state() processing?
2591 * The idea is to avoid waking up RCU core processing on such a 2651 * The idea is to avoid waking up RCU core processing on such a
@@ -2610,8 +2670,6 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
2610 */ 2670 */
2611static void rcu_bind_gp_kthread(void) 2671static void rcu_bind_gp_kthread(void)
2612{ 2672{
2613 int __maybe_unused cpu;
2614
2615 if (!tick_nohz_full_enabled()) 2673 if (!tick_nohz_full_enabled())
2616 return; 2674 return;
2617 housekeeping_affine(current, HK_FLAG_RCU); 2675 housekeeping_affine(current, HK_FLAG_RCU);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4c230a60ece4..39cb23d22109 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -507,14 +507,15 @@ early_initcall(check_cpu_stall_init);
507#ifdef CONFIG_TASKS_RCU 507#ifdef CONFIG_TASKS_RCU
508 508
509/* 509/*
510 * Simple variant of RCU whose quiescent states are voluntary context switch, 510 * Simple variant of RCU whose quiescent states are voluntary context
511 * user-space execution, and idle. As such, grace periods can take one good 511 * switch, cond_resched_rcu_qs(), user-space execution, and idle.
512 * long time. There are no read-side primitives similar to rcu_read_lock() 512 * As such, grace periods can take one good long time. There are no
513 * and rcu_read_unlock() because this implementation is intended to get 513 * read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
514 * the system into a safe state for some of the manipulations involved in 514 * because this implementation is intended to get the system into a safe
515 * tracing and the like. Finally, this implementation does not support 515 * state for some of the manipulations involved in tracing and the like.
516 * high call_rcu_tasks() rates from multiple CPUs. If this is required, 516 * Finally, this implementation does not support high call_rcu_tasks()
517 * per-CPU callback lists will be needed. 517 * rates from multiple CPUs. If this is required, per-CPU callback lists
518 * will be needed.
518 */ 519 */
519 520
520/* Global list of callbacks and associated lock. */ 521/* Global list of callbacks and associated lock. */
@@ -542,11 +543,11 @@ static struct task_struct *rcu_tasks_kthread_ptr;
542 * period elapses, in other words after all currently executing RCU 543 * period elapses, in other words after all currently executing RCU
543 * read-side critical sections have completed. call_rcu_tasks() assumes 544 * read-side critical sections have completed. call_rcu_tasks() assumes
544 * that the read-side critical sections end at a voluntary context 545 * that the read-side critical sections end at a voluntary context
545 * switch (not a preemption!), entry into idle, or transition to usermode 546 * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
546 * execution. As such, there are no read-side primitives analogous to 547 * or transition to usermode execution. As such, there are no read-side
547 * rcu_read_lock() and rcu_read_unlock() because this primitive is intended 548 * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
548 * to determine that all tasks have passed through a safe state, not so 549 * this primitive is intended to determine that all tasks have passed
549 * much for data-strcuture synchronization. 550 * through a safe state, not so much for data-strcuture synchronization.
550 * 551 *
551 * See the description of call_rcu() for more detailed information on 552 * See the description of call_rcu() for more detailed information on
552 * memory ordering guarantees. 553 * memory ordering guarantees.
@@ -667,6 +668,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
667 struct rcu_head *list; 668 struct rcu_head *list;
668 struct rcu_head *next; 669 struct rcu_head *next;
669 LIST_HEAD(rcu_tasks_holdouts); 670 LIST_HEAD(rcu_tasks_holdouts);
671 int fract;
670 672
671 /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ 673 /* Run on housekeeping CPUs by default. Sysadm can move if desired. */
672 housekeeping_affine(current, HK_FLAG_RCU); 674 housekeeping_affine(current, HK_FLAG_RCU);
@@ -748,13 +750,25 @@ static int __noreturn rcu_tasks_kthread(void *arg)
748 * holdouts. When the list is empty, we are done. 750 * holdouts. When the list is empty, we are done.
749 */ 751 */
750 lastreport = jiffies; 752 lastreport = jiffies;
751 while (!list_empty(&rcu_tasks_holdouts)) { 753
754 /* Start off with HZ/10 wait and slowly back off to 1 HZ wait*/
755 fract = 10;
756
757 for (;;) {
752 bool firstreport; 758 bool firstreport;
753 bool needreport; 759 bool needreport;
754 int rtst; 760 int rtst;
755 struct task_struct *t1; 761 struct task_struct *t1;
756 762
757 schedule_timeout_interruptible(HZ); 763 if (list_empty(&rcu_tasks_holdouts))
764 break;
765
766 /* Slowly back off waiting for holdouts */
767 schedule_timeout_interruptible(HZ/fract);
768
769 if (fract > 1)
770 fract--;
771
758 rtst = READ_ONCE(rcu_task_stall_timeout); 772 rtst = READ_ONCE(rcu_task_stall_timeout);
759 needreport = rtst > 0 && 773 needreport = rtst > 0 &&
760 time_after(jiffies, lastreport + rtst); 774 time_after(jiffies, lastreport + rtst);
@@ -800,6 +814,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
800 list = next; 814 list = next;
801 cond_resched(); 815 cond_resched();
802 } 816 }
817 /* Paranoid sleep to keep this from entering a tight loop */
803 schedule_timeout_uninterruptible(HZ/10); 818 schedule_timeout_uninterruptible(HZ/10);
804 } 819 }
805} 820}
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index d9a02b318108..7fe183404c38 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
20obj-y += idle.o fair.o rt.o deadline.o 20obj-y += idle.o fair.o rt.o deadline.o
21obj-y += wait.o wait_bit.o swait.o completion.o 21obj-y += wait.o wait_bit.o swait.o completion.o
22 22
23obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o 23obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
24obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o 24obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
25obj-$(CONFIG_SCHEDSTATS) += stats.o 25obj-$(CONFIG_SCHEDSTATS) += stats.o
26obj-$(CONFIG_SCHED_DEBUG) += debug.o 26obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 10c83e73837a..e3e3b979f9bd 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -53,6 +53,7 @@
53 * 53 *
54 */ 54 */
55#include "sched.h" 55#include "sched.h"
56#include <linux/sched_clock.h>
56 57
57/* 58/*
58 * Scheduler clock - returns current time in nanosec units. 59 * Scheduler clock - returns current time in nanosec units.
@@ -66,12 +67,7 @@ unsigned long long __weak sched_clock(void)
66} 67}
67EXPORT_SYMBOL_GPL(sched_clock); 68EXPORT_SYMBOL_GPL(sched_clock);
68 69
69__read_mostly int sched_clock_running; 70static DEFINE_STATIC_KEY_FALSE(sched_clock_running);
70
71void sched_clock_init(void)
72{
73 sched_clock_running = 1;
74}
75 71
76#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 72#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
77/* 73/*
@@ -195,17 +191,40 @@ void clear_sched_clock_stable(void)
195 191
196 smp_mb(); /* matches sched_clock_init_late() */ 192 smp_mb(); /* matches sched_clock_init_late() */
197 193
198 if (sched_clock_running == 2) 194 if (static_key_count(&sched_clock_running.key) == 2)
199 __clear_sched_clock_stable(); 195 __clear_sched_clock_stable();
200} 196}
201 197
198static void __sched_clock_gtod_offset(void)
199{
200 struct sched_clock_data *scd = this_scd();
201
202 __scd_stamp(scd);
203 __gtod_offset = (scd->tick_raw + __sched_clock_offset) - scd->tick_gtod;
204}
205
206void __init sched_clock_init(void)
207{
208 /*
209 * Set __gtod_offset such that once we mark sched_clock_running,
210 * sched_clock_tick() continues where sched_clock() left off.
211 *
212 * Even if TSC is buggered, we're still UP at this point so it
213 * can't really be out of sync.
214 */
215 local_irq_disable();
216 __sched_clock_gtod_offset();
217 local_irq_enable();
218
219 static_branch_inc(&sched_clock_running);
220}
202/* 221/*
203 * We run this as late_initcall() such that it runs after all built-in drivers, 222 * We run this as late_initcall() such that it runs after all built-in drivers,
204 * notably: acpi_processor and intel_idle, which can mark the TSC as unstable. 223 * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
205 */ 224 */
206static int __init sched_clock_init_late(void) 225static int __init sched_clock_init_late(void)
207{ 226{
208 sched_clock_running = 2; 227 static_branch_inc(&sched_clock_running);
209 /* 228 /*
210 * Ensure that it is impossible to not do a static_key update. 229 * Ensure that it is impossible to not do a static_key update.
211 * 230 *
@@ -350,8 +369,8 @@ u64 sched_clock_cpu(int cpu)
350 if (sched_clock_stable()) 369 if (sched_clock_stable())
351 return sched_clock() + __sched_clock_offset; 370 return sched_clock() + __sched_clock_offset;
352 371
353 if (unlikely(!sched_clock_running)) 372 if (!static_branch_unlikely(&sched_clock_running))
354 return 0ull; 373 return sched_clock();
355 374
356 preempt_disable_notrace(); 375 preempt_disable_notrace();
357 scd = cpu_sdc(cpu); 376 scd = cpu_sdc(cpu);
@@ -373,7 +392,7 @@ void sched_clock_tick(void)
373 if (sched_clock_stable()) 392 if (sched_clock_stable())
374 return; 393 return;
375 394
376 if (unlikely(!sched_clock_running)) 395 if (!static_branch_unlikely(&sched_clock_running))
377 return; 396 return;
378 397
379 lockdep_assert_irqs_disabled(); 398 lockdep_assert_irqs_disabled();
@@ -385,8 +404,6 @@ void sched_clock_tick(void)
385 404
386void sched_clock_tick_stable(void) 405void sched_clock_tick_stable(void)
387{ 406{
388 u64 gtod, clock;
389
390 if (!sched_clock_stable()) 407 if (!sched_clock_stable())
391 return; 408 return;
392 409
@@ -398,9 +415,7 @@ void sched_clock_tick_stable(void)
398 * TSC to be unstable, any computation will be computing crap. 415 * TSC to be unstable, any computation will be computing crap.
399 */ 416 */
400 local_irq_disable(); 417 local_irq_disable();
401 gtod = ktime_get_ns(); 418 __sched_clock_gtod_offset();
402 clock = sched_clock();
403 __gtod_offset = (clock + __sched_clock_offset) - gtod;
404 local_irq_enable(); 419 local_irq_enable();
405} 420}
406 421
@@ -434,9 +449,17 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
434 449
435#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 450#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
436 451
452void __init sched_clock_init(void)
453{
454 static_branch_inc(&sched_clock_running);
455 local_irq_disable();
456 generic_sched_clock_init();
457 local_irq_enable();
458}
459
437u64 sched_clock_cpu(int cpu) 460u64 sched_clock_cpu(int cpu)
438{ 461{
439 if (unlikely(!sched_clock_running)) 462 if (!static_branch_unlikely(&sched_clock_running))
440 return 0; 463 return 0;
441 464
442 return sched_clock(); 465 return sched_clock();
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index e426b0cb9ac6..a1ad5b7d5521 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -22,8 +22,8 @@
22 * 22 *
23 * See also complete_all(), wait_for_completion() and related routines. 23 * See also complete_all(), wait_for_completion() and related routines.
24 * 24 *
25 * It may be assumed that this function implies a write memory barrier before 25 * If this function wakes up a task, it executes a full memory barrier before
26 * changing the task state if and only if any tasks are woken up. 26 * accessing the task state.
27 */ 27 */
28void complete(struct completion *x) 28void complete(struct completion *x)
29{ 29{
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(complete);
44 * 44 *
45 * This will wake up all threads waiting on this particular completion event. 45 * This will wake up all threads waiting on this particular completion event.
46 * 46 *
47 * It may be assumed that this function implies a write memory barrier before 47 * If this function wakes up a task, it executes a full memory barrier before
48 * changing the task state if and only if any tasks are woken up. 48 * accessing the task state.
49 * 49 *
50 * Since complete_all() sets the completion of @x permanently to done 50 * Since complete_all() sets the completion of @x permanently to done
51 * to allow multiple waiters to finish, a call to reinit_completion() 51 * to allow multiple waiters to finish, a call to reinit_completion()
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..c45de46fdf10 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -17,6 +17,8 @@
17#include "../workqueue_internal.h" 17#include "../workqueue_internal.h"
18#include "../smpboot.h" 18#include "../smpboot.h"
19 19
20#include "pelt.h"
21
20#define CREATE_TRACE_POINTS 22#define CREATE_TRACE_POINTS
21#include <trace/events/sched.h> 23#include <trace/events/sched.h>
22 24
@@ -45,14 +47,6 @@ const_debug unsigned int sysctl_sched_features =
45const_debug unsigned int sysctl_sched_nr_migrate = 32; 47const_debug unsigned int sysctl_sched_nr_migrate = 32;
46 48
47/* 49/*
48 * period over which we average the RT time consumption, measured
49 * in ms.
50 *
51 * default: 1s
52 */
53const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
54
55/*
56 * period over which we measure -rt task CPU usage in us. 50 * period over which we measure -rt task CPU usage in us.
57 * default: 1s 51 * default: 1s
58 */ 52 */
@@ -183,9 +177,9 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
183 177
184 rq->clock_task += delta; 178 rq->clock_task += delta;
185 179
186#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) 180#ifdef HAVE_SCHED_AVG_IRQ
187 if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) 181 if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
188 sched_rt_avg_update(rq, irq_delta + steal); 182 update_irq_load_avg(rq, irq_delta + steal);
189#endif 183#endif
190} 184}
191 185
@@ -412,8 +406,8 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
412 * its already queued (either by us or someone else) and will get the 406 * its already queued (either by us or someone else) and will get the
413 * wakeup due to that. 407 * wakeup due to that.
414 * 408 *
415 * This cmpxchg() implies a full barrier, which pairs with the write 409 * This cmpxchg() executes a full barrier, which pairs with the full
416 * barrier implied by the wakeup in wake_up_q(). 410 * barrier executed by the wakeup in wake_up_q().
417 */ 411 */
418 if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) 412 if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
419 return; 413 return;
@@ -441,8 +435,8 @@ void wake_up_q(struct wake_q_head *head)
441 task->wake_q.next = NULL; 435 task->wake_q.next = NULL;
442 436
443 /* 437 /*
444 * wake_up_process() implies a wmb() to pair with the queueing 438 * wake_up_process() executes a full barrier, which pairs with
445 * in wake_q_add() so as not to miss wakeups. 439 * the queueing in wake_q_add() so as not to miss wakeups.
446 */ 440 */
447 wake_up_process(task); 441 wake_up_process(task);
448 put_task_struct(task); 442 put_task_struct(task);
@@ -649,23 +643,6 @@ bool sched_can_stop_tick(struct rq *rq)
649 return true; 643 return true;
650} 644}
651#endif /* CONFIG_NO_HZ_FULL */ 645#endif /* CONFIG_NO_HZ_FULL */
652
653void sched_avg_update(struct rq *rq)
654{
655 s64 period = sched_avg_period();
656
657 while ((s64)(rq_clock(rq) - rq->age_stamp) > period) {
658 /*
659 * Inline assembly required to prevent the compiler
660 * optimising this loop into a divmod call.
661 * See __iter_div_u64_rem() for another example of this.
662 */
663 asm("" : "+rm" (rq->age_stamp));
664 rq->age_stamp += period;
665 rq->rt_avg /= 2;
666 }
667}
668
669#endif /* CONFIG_SMP */ 646#endif /* CONFIG_SMP */
670 647
671#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ 648#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
@@ -1199,6 +1176,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1199 __set_task_cpu(p, new_cpu); 1176 __set_task_cpu(p, new_cpu);
1200} 1177}
1201 1178
1179#ifdef CONFIG_NUMA_BALANCING
1202static void __migrate_swap_task(struct task_struct *p, int cpu) 1180static void __migrate_swap_task(struct task_struct *p, int cpu)
1203{ 1181{
1204 if (task_on_rq_queued(p)) { 1182 if (task_on_rq_queued(p)) {
@@ -1280,16 +1258,17 @@ unlock:
1280/* 1258/*
1281 * Cross migrate two tasks 1259 * Cross migrate two tasks
1282 */ 1260 */
1283int migrate_swap(struct task_struct *cur, struct task_struct *p) 1261int migrate_swap(struct task_struct *cur, struct task_struct *p,
1262 int target_cpu, int curr_cpu)
1284{ 1263{
1285 struct migration_swap_arg arg; 1264 struct migration_swap_arg arg;
1286 int ret = -EINVAL; 1265 int ret = -EINVAL;
1287 1266
1288 arg = (struct migration_swap_arg){ 1267 arg = (struct migration_swap_arg){
1289 .src_task = cur, 1268 .src_task = cur,
1290 .src_cpu = task_cpu(cur), 1269 .src_cpu = curr_cpu,
1291 .dst_task = p, 1270 .dst_task = p,
1292 .dst_cpu = task_cpu(p), 1271 .dst_cpu = target_cpu,
1293 }; 1272 };
1294 1273
1295 if (arg.src_cpu == arg.dst_cpu) 1274 if (arg.src_cpu == arg.dst_cpu)
@@ -1314,6 +1293,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
1314out: 1293out:
1315 return ret; 1294 return ret;
1316} 1295}
1296#endif /* CONFIG_NUMA_BALANCING */
1317 1297
1318/* 1298/*
1319 * wait_task_inactive - wait for a thread to unschedule. 1299 * wait_task_inactive - wait for a thread to unschedule.
@@ -1879,8 +1859,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
1879 * rq(c1)->lock (if not at the same time, then in that order). 1859 * rq(c1)->lock (if not at the same time, then in that order).
1880 * C) LOCK of the rq(c1)->lock scheduling in task 1860 * C) LOCK of the rq(c1)->lock scheduling in task
1881 * 1861 *
1882 * Transitivity guarantees that B happens after A and C after B. 1862 * Release/acquire chaining guarantees that B happens after A and C after B.
1883 * Note: we only require RCpc transitivity.
1884 * Note: the CPU doing B need not be c0 or c1 1863 * Note: the CPU doing B need not be c0 or c1
1885 * 1864 *
1886 * Example: 1865 * Example:
@@ -1942,16 +1921,9 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
1942 * UNLOCK rq(0)->lock 1921 * UNLOCK rq(0)->lock
1943 * 1922 *
1944 * 1923 *
1945 * However; for wakeups there is a second guarantee we must provide, namely we 1924 * However, for wakeups there is a second guarantee we must provide, namely we
1946 * must observe the state that lead to our wakeup. That is, not only must our 1925 * must ensure that CONDITION=1 done by the caller can not be reordered with
1947 * task observe its own prior state, it must also observe the stores prior to 1926 * accesses to the task state; see try_to_wake_up() and set_current_state().
1948 * its wakeup.
1949 *
1950 * This means that any means of doing remote wakeups must order the CPU doing
1951 * the wakeup against the CPU the task is going to end up running on. This,
1952 * however, is already required for the regular Program-Order guarantee above,
1953 * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
1954 *
1955 */ 1927 */
1956 1928
1957/** 1929/**
@@ -1967,6 +1939,9 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
1967 * Atomic against schedule() which would dequeue a task, also see 1939 * Atomic against schedule() which would dequeue a task, also see
1968 * set_current_state(). 1940 * set_current_state().
1969 * 1941 *
1942 * This function executes a full memory barrier before accessing the task
1943 * state; see set_current_state().
1944 *
1970 * Return: %true if @p->state changes (an actual wakeup was done), 1945 * Return: %true if @p->state changes (an actual wakeup was done),
1971 * %false otherwise. 1946 * %false otherwise.
1972 */ 1947 */
@@ -1998,21 +1973,20 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1998 * be possible to, falsely, observe p->on_rq == 0 and get stuck 1973 * be possible to, falsely, observe p->on_rq == 0 and get stuck
1999 * in smp_cond_load_acquire() below. 1974 * in smp_cond_load_acquire() below.
2000 * 1975 *
2001 * sched_ttwu_pending() try_to_wake_up() 1976 * sched_ttwu_pending() try_to_wake_up()
2002 * [S] p->on_rq = 1; [L] P->state 1977 * STORE p->on_rq = 1 LOAD p->state
2003 * UNLOCK rq->lock -----. 1978 * UNLOCK rq->lock
2004 * \ 1979 *
2005 * +--- RMB 1980 * __schedule() (switch to task 'p')
2006 * schedule() / 1981 * LOCK rq->lock smp_rmb();
2007 * LOCK rq->lock -----' 1982 * smp_mb__after_spinlock();
2008 * UNLOCK rq->lock 1983 * UNLOCK rq->lock
2009 * 1984 *
2010 * [task p] 1985 * [task p]
2011 * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq 1986 * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq
2012 * 1987 *
2013 * Pairs with the UNLOCK+LOCK on rq->lock from the 1988 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
2014 * last wakeup of our task and the schedule that got our task 1989 * __schedule(). See the comment for smp_mb__after_spinlock().
2015 * current.
2016 */ 1990 */
2017 smp_rmb(); 1991 smp_rmb();
2018 if (p->on_rq && ttwu_remote(p, wake_flags)) 1992 if (p->on_rq && ttwu_remote(p, wake_flags))
@@ -2026,15 +2000,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2026 * One must be running (->on_cpu == 1) in order to remove oneself 2000 * One must be running (->on_cpu == 1) in order to remove oneself
2027 * from the runqueue. 2001 * from the runqueue.
2028 * 2002 *
2029 * [S] ->on_cpu = 1; [L] ->on_rq 2003 * __schedule() (switch to task 'p') try_to_wake_up()
2030 * UNLOCK rq->lock 2004 * STORE p->on_cpu = 1 LOAD p->on_rq
2031 * RMB 2005 * UNLOCK rq->lock
2032 * LOCK rq->lock 2006 *
2033 * [S] ->on_rq = 0; [L] ->on_cpu 2007 * __schedule() (put 'p' to sleep)
2008 * LOCK rq->lock smp_rmb();
2009 * smp_mb__after_spinlock();
2010 * STORE p->on_rq = 0 LOAD p->on_cpu
2034 * 2011 *
2035 * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock 2012 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
2036 * from the consecutive calls to schedule(); the first switching to our 2013 * __schedule(). See the comment for smp_mb__after_spinlock().
2037 * task, the second putting it to sleep.
2038 */ 2014 */
2039 smp_rmb(); 2015 smp_rmb();
2040 2016
@@ -2140,8 +2116,7 @@ out:
2140 * 2116 *
2141 * Return: 1 if the process was woken up, 0 if it was already running. 2117 * Return: 1 if the process was woken up, 0 if it was already running.
2142 * 2118 *
2143 * It may be assumed that this function implies a write memory barrier before 2119 * This function executes a full memory barrier before accessing the task state.
2144 * changing the task state if and only if any tasks are woken up.
2145 */ 2120 */
2146int wake_up_process(struct task_struct *p) 2121int wake_up_process(struct task_struct *p)
2147{ 2122{
@@ -2317,7 +2292,6 @@ static inline void init_schedstats(void) {}
2317int sched_fork(unsigned long clone_flags, struct task_struct *p) 2292int sched_fork(unsigned long clone_flags, struct task_struct *p)
2318{ 2293{
2319 unsigned long flags; 2294 unsigned long flags;
2320 int cpu = get_cpu();
2321 2295
2322 __sched_fork(clone_flags, p); 2296 __sched_fork(clone_flags, p);
2323 /* 2297 /*
@@ -2353,14 +2327,12 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2353 p->sched_reset_on_fork = 0; 2327 p->sched_reset_on_fork = 0;
2354 } 2328 }
2355 2329
2356 if (dl_prio(p->prio)) { 2330 if (dl_prio(p->prio))
2357 put_cpu();
2358 return -EAGAIN; 2331 return -EAGAIN;
2359 } else if (rt_prio(p->prio)) { 2332 else if (rt_prio(p->prio))
2360 p->sched_class = &rt_sched_class; 2333 p->sched_class = &rt_sched_class;
2361 } else { 2334 else
2362 p->sched_class = &fair_sched_class; 2335 p->sched_class = &fair_sched_class;
2363 }
2364 2336
2365 init_entity_runnable_average(&p->se); 2337 init_entity_runnable_average(&p->se);
2366 2338
@@ -2376,7 +2348,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2376 * We're setting the CPU for the first time, we don't migrate, 2348 * We're setting the CPU for the first time, we don't migrate,
2377 * so use __set_task_cpu(). 2349 * so use __set_task_cpu().
2378 */ 2350 */
2379 __set_task_cpu(p, cpu); 2351 __set_task_cpu(p, smp_processor_id());
2380 if (p->sched_class->task_fork) 2352 if (p->sched_class->task_fork)
2381 p->sched_class->task_fork(p); 2353 p->sched_class->task_fork(p);
2382 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 2354 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -2393,8 +2365,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2393 plist_node_init(&p->pushable_tasks, MAX_PRIO); 2365 plist_node_init(&p->pushable_tasks, MAX_PRIO);
2394 RB_CLEAR_NODE(&p->pushable_dl_tasks); 2366 RB_CLEAR_NODE(&p->pushable_dl_tasks);
2395#endif 2367#endif
2396
2397 put_cpu();
2398 return 0; 2368 return 0;
2399} 2369}
2400 2370
@@ -5714,13 +5684,6 @@ void set_rq_offline(struct rq *rq)
5714 } 5684 }
5715} 5685}
5716 5686
5717static void set_cpu_rq_start_time(unsigned int cpu)
5718{
5719 struct rq *rq = cpu_rq(cpu);
5720
5721 rq->age_stamp = sched_clock_cpu(cpu);
5722}
5723
5724/* 5687/*
5725 * used to mark begin/end of suspend/resume: 5688 * used to mark begin/end of suspend/resume:
5726 */ 5689 */
@@ -5838,7 +5801,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
5838 5801
5839int sched_cpu_starting(unsigned int cpu) 5802int sched_cpu_starting(unsigned int cpu)
5840{ 5803{
5841 set_cpu_rq_start_time(cpu);
5842 sched_rq_cpu_starting(cpu); 5804 sched_rq_cpu_starting(cpu);
5843 sched_tick_start(cpu); 5805 sched_tick_start(cpu);
5844 return 0; 5806 return 0;
@@ -5954,7 +5916,6 @@ void __init sched_init(void)
5954 int i, j; 5916 int i, j;
5955 unsigned long alloc_size = 0, ptr; 5917 unsigned long alloc_size = 0, ptr;
5956 5918
5957 sched_clock_init();
5958 wait_bit_init(); 5919 wait_bit_init();
5959 5920
5960#ifdef CONFIG_FAIR_GROUP_SCHED 5921#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -6106,7 +6067,6 @@ void __init sched_init(void)
6106 6067
6107#ifdef CONFIG_SMP 6068#ifdef CONFIG_SMP
6108 idle_thread_set_boot_cpu(); 6069 idle_thread_set_boot_cpu();
6109 set_cpu_rq_start_time(smp_processor_id());
6110#endif 6070#endif
6111 init_sched_fair_class(); 6071 init_sched_fair_class();
6112 6072
@@ -6785,6 +6745,16 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
6785 seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled); 6745 seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled);
6786 seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time); 6746 seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
6787 6747
6748 if (schedstat_enabled() && tg != &root_task_group) {
6749 u64 ws = 0;
6750 int i;
6751
6752 for_each_possible_cpu(i)
6753 ws += schedstat_val(tg->se[i]->statistics.wait_sum);
6754
6755 seq_printf(sf, "wait_sum %llu\n", ws);
6756 }
6757
6788 return 0; 6758 return 0;
6789} 6759}
6790#endif /* CONFIG_CFS_BANDWIDTH */ 6760#endif /* CONFIG_CFS_BANDWIDTH */
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index c907fde01eaa..3fffad3bc8a8 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -53,9 +53,7 @@ struct sugov_cpu {
53 unsigned int iowait_boost_max; 53 unsigned int iowait_boost_max;
54 u64 last_update; 54 u64 last_update;
55 55
56 /* The fields below are only needed when sharing a policy: */ 56 unsigned long bw_dl;
57 unsigned long util_cfs;
58 unsigned long util_dl;
59 unsigned long max; 57 unsigned long max;
60 58
61 /* The field below is for single-CPU policies only: */ 59 /* The field below is for single-CPU policies only: */
@@ -179,33 +177,90 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
179 return cpufreq_driver_resolve_freq(policy, freq); 177 return cpufreq_driver_resolve_freq(policy, freq);
180} 178}
181 179
182static void sugov_get_util(struct sugov_cpu *sg_cpu) 180/*
181 * This function computes an effective utilization for the given CPU, to be
182 * used for frequency selection given the linear relation: f = u * f_max.
183 *
184 * The scheduler tracks the following metrics:
185 *
186 * cpu_util_{cfs,rt,dl,irq}()
187 * cpu_bw_dl()
188 *
189 * Where the cfs,rt and dl util numbers are tracked with the same metric and
190 * synchronized windows and are thus directly comparable.
191 *
192 * The cfs,rt,dl utilization are the running times measured with rq->clock_task
193 * which excludes things like IRQ and steal-time. These latter are then accrued
194 * in the irq utilization.
195 *
196 * The DL bandwidth number otoh is not a measured metric but a value computed
197 * based on the task model parameters and gives the minimal utilization
198 * required to meet deadlines.
199 */
200static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
183{ 201{
184 struct rq *rq = cpu_rq(sg_cpu->cpu); 202 struct rq *rq = cpu_rq(sg_cpu->cpu);
203 unsigned long util, irq, max;
185 204
186 sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); 205 sg_cpu->max = max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
187 sg_cpu->util_cfs = cpu_util_cfs(rq); 206 sg_cpu->bw_dl = cpu_bw_dl(rq);
188 sg_cpu->util_dl = cpu_util_dl(rq);
189}
190
191static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
192{
193 struct rq *rq = cpu_rq(sg_cpu->cpu);
194 207
195 if (rt_rq_is_runnable(&rq->rt)) 208 if (rt_rq_is_runnable(&rq->rt))
196 return sg_cpu->max; 209 return max;
210
211 /*
212 * Early check to see if IRQ/steal time saturates the CPU, can be
213 * because of inaccuracies in how we track these -- see
214 * update_irq_load_avg().
215 */
216 irq = cpu_util_irq(rq);
217 if (unlikely(irq >= max))
218 return max;
219
220 /*
221 * Because the time spend on RT/DL tasks is visible as 'lost' time to
222 * CFS tasks and we use the same metric to track the effective
223 * utilization (PELT windows are synchronized) we can directly add them
224 * to obtain the CPU's actual utilization.
225 */
226 util = cpu_util_cfs(rq);
227 util += cpu_util_rt(rq);
228
229 /*
230 * We do not make cpu_util_dl() a permanent part of this sum because we
231 * want to use cpu_bw_dl() later on, but we need to check if the
232 * CFS+RT+DL sum is saturated (ie. no idle time) such that we select
233 * f_max when there is no idle time.
234 *
235 * NOTE: numerical errors or stop class might cause us to not quite hit
236 * saturation when we should -- something for later.
237 */
238 if ((util + cpu_util_dl(rq)) >= max)
239 return max;
240
241 /*
242 * There is still idle time; further improve the number by using the
243 * irq metric. Because IRQ/steal time is hidden from the task clock we
244 * need to scale the task numbers:
245 *
246 * 1 - irq
247 * U' = irq + ------- * U
248 * max
249 */
250 util = scale_irq_capacity(util, irq, max);
251 util += irq;
197 252
198 /* 253 /*
199 * Utilization required by DEADLINE must always be granted while, for 254 * Bandwidth required by DEADLINE must always be granted while, for
200 * FAIR, we use blocked utilization of IDLE CPUs as a mechanism to 255 * FAIR and RT, we use blocked utilization of IDLE CPUs as a mechanism
201 * gracefully reduce the frequency when no tasks show up for longer 256 * to gracefully reduce the frequency when no tasks show up for longer
202 * periods of time. 257 * periods of time.
203 * 258 *
204 * Ideally we would like to set util_dl as min/guaranteed freq and 259 * Ideally we would like to set bw_dl as min/guaranteed freq and util +
205 * util_cfs + util_dl as requested freq. However, cpufreq is not yet 260 * bw_dl as requested freq. However, cpufreq is not yet ready for such
206 * ready for such an interface. So, we only do the latter for now. 261 * an interface. So, we only do the latter for now.
207 */ 262 */
208 return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs)); 263 return min(max, util + sg_cpu->bw_dl);
209} 264}
210 265
211/** 266/**
@@ -360,7 +415,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
360 */ 415 */
361static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) 416static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
362{ 417{
363 if (cpu_util_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->util_dl) 418 if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
364 sg_policy->need_freq_update = true; 419 sg_policy->need_freq_update = true;
365} 420}
366 421
@@ -383,9 +438,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
383 438
384 busy = sugov_cpu_is_busy(sg_cpu); 439 busy = sugov_cpu_is_busy(sg_cpu);
385 440
386 sugov_get_util(sg_cpu); 441 util = sugov_get_util(sg_cpu);
387 max = sg_cpu->max; 442 max = sg_cpu->max;
388 util = sugov_aggregate_util(sg_cpu);
389 sugov_iowait_apply(sg_cpu, time, &util, &max); 443 sugov_iowait_apply(sg_cpu, time, &util, &max);
390 next_f = get_next_freq(sg_policy, util, max); 444 next_f = get_next_freq(sg_policy, util, max);
391 /* 445 /*
@@ -424,9 +478,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
424 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); 478 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
425 unsigned long j_util, j_max; 479 unsigned long j_util, j_max;
426 480
427 sugov_get_util(j_sg_cpu); 481 j_util = sugov_get_util(j_sg_cpu);
428 j_max = j_sg_cpu->max; 482 j_max = j_sg_cpu->max;
429 j_util = sugov_aggregate_util(j_sg_cpu);
430 sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max); 483 sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
431 484
432 if (j_util * max > j_max * util) { 485 if (j_util * max > j_max * util) {
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b5fbdde6afa9..997ea7b839fa 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -16,6 +16,7 @@
16 * Fabio Checconi <fchecconi@gmail.com> 16 * Fabio Checconi <fchecconi@gmail.com>
17 */ 17 */
18#include "sched.h" 18#include "sched.h"
19#include "pelt.h"
19 20
20struct dl_bandwidth def_dl_bandwidth; 21struct dl_bandwidth def_dl_bandwidth;
21 22
@@ -1179,8 +1180,6 @@ static void update_curr_dl(struct rq *rq)
1179 curr->se.exec_start = now; 1180 curr->se.exec_start = now;
1180 cgroup_account_cputime(curr, delta_exec); 1181 cgroup_account_cputime(curr, delta_exec);
1181 1182
1182 sched_rt_avg_update(rq, delta_exec);
1183
1184 if (dl_entity_is_special(dl_se)) 1183 if (dl_entity_is_special(dl_se))
1185 return; 1184 return;
1186 1185
@@ -1761,6 +1760,9 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
1761 1760
1762 deadline_queue_push_tasks(rq); 1761 deadline_queue_push_tasks(rq);
1763 1762
1763 if (rq->curr->sched_class != &dl_sched_class)
1764 update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
1765
1764 return p; 1766 return p;
1765} 1767}
1766 1768
@@ -1768,6 +1770,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
1768{ 1770{
1769 update_curr_dl(rq); 1771 update_curr_dl(rq);
1770 1772
1773 update_dl_rq_load_avg(rq_clock_task(rq), rq, 1);
1771 if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) 1774 if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
1772 enqueue_pushable_dl_task(rq, p); 1775 enqueue_pushable_dl_task(rq, p);
1773} 1776}
@@ -1784,6 +1787,7 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
1784{ 1787{
1785 update_curr_dl(rq); 1788 update_curr_dl(rq);
1786 1789
1790 update_dl_rq_load_avg(rq_clock_task(rq), rq, 1);
1787 /* 1791 /*
1788 * Even when we have runtime, update_curr_dl() might have resulted in us 1792 * Even when we have runtime, update_curr_dl() might have resulted in us
1789 * not being the leftmost task anymore. In that case NEED_RESCHED will 1793 * not being the leftmost task anymore. In that case NEED_RESCHED will
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index e593b4118578..60caf1fb94e0 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -111,20 +111,19 @@ static int sched_feat_set(char *cmp)
111 cmp += 3; 111 cmp += 3;
112 } 112 }
113 113
114 for (i = 0; i < __SCHED_FEAT_NR; i++) { 114 i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp);
115 if (strcmp(cmp, sched_feat_names[i]) == 0) { 115 if (i < 0)
116 if (neg) { 116 return i;
117 sysctl_sched_features &= ~(1UL << i); 117
118 sched_feat_disable(i); 118 if (neg) {
119 } else { 119 sysctl_sched_features &= ~(1UL << i);
120 sysctl_sched_features |= (1UL << i); 120 sched_feat_disable(i);
121 sched_feat_enable(i); 121 } else {
122 } 122 sysctl_sched_features |= (1UL << i);
123 break; 123 sched_feat_enable(i);
124 }
125 } 124 }
126 125
127 return i; 126 return 0;
128} 127}
129 128
130static ssize_t 129static ssize_t
@@ -133,7 +132,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
133{ 132{
134 char buf[64]; 133 char buf[64];
135 char *cmp; 134 char *cmp;
136 int i; 135 int ret;
137 struct inode *inode; 136 struct inode *inode;
138 137
139 if (cnt > 63) 138 if (cnt > 63)
@@ -148,10 +147,10 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
148 /* Ensure the static_key remains in a consistent state */ 147 /* Ensure the static_key remains in a consistent state */
149 inode = file_inode(filp); 148 inode = file_inode(filp);
150 inode_lock(inode); 149 inode_lock(inode);
151 i = sched_feat_set(cmp); 150 ret = sched_feat_set(cmp);
152 inode_unlock(inode); 151 inode_unlock(inode);
153 if (i == __SCHED_FEAT_NR) 152 if (ret < 0)
154 return -EINVAL; 153 return ret;
155 154
156 *ppos += cnt; 155 *ppos += cnt;
157 156
@@ -623,8 +622,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
623#undef PU 622#undef PU
624} 623}
625 624
626extern __read_mostly int sched_clock_running;
627
628static void print_cpu(struct seq_file *m, int cpu) 625static void print_cpu(struct seq_file *m, int cpu)
629{ 626{
630 struct rq *rq = cpu_rq(cpu); 627 struct rq *rq = cpu_rq(cpu);
@@ -843,8 +840,8 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
843 unsigned long tpf, unsigned long gsf, unsigned long gpf) 840 unsigned long tpf, unsigned long gsf, unsigned long gpf)
844{ 841{
845 SEQ_printf(m, "numa_faults node=%d ", node); 842 SEQ_printf(m, "numa_faults node=%d ", node);
846 SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf); 843 SEQ_printf(m, "task_private=%lu task_shared=%lu ", tpf, tsf);
847 SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf); 844 SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gpf, gsf);
848} 845}
849#endif 846#endif
850 847
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2f0a0be4d344..309c93fcc604 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -255,9 +255,6 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
255 return cfs_rq->rq; 255 return cfs_rq->rq;
256} 256}
257 257
258/* An entity is a task if it doesn't "own" a runqueue */
259#define entity_is_task(se) (!se->my_q)
260
261static inline struct task_struct *task_of(struct sched_entity *se) 258static inline struct task_struct *task_of(struct sched_entity *se)
262{ 259{
263 SCHED_WARN_ON(!entity_is_task(se)); 260 SCHED_WARN_ON(!entity_is_task(se));
@@ -419,7 +416,6 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
419 return container_of(cfs_rq, struct rq, cfs); 416 return container_of(cfs_rq, struct rq, cfs);
420} 417}
421 418
422#define entity_is_task(se) 1
423 419
424#define for_each_sched_entity(se) \ 420#define for_each_sched_entity(se) \
425 for (; se; se = NULL) 421 for (; se; se = NULL)
@@ -692,7 +688,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
692} 688}
693 689
694#ifdef CONFIG_SMP 690#ifdef CONFIG_SMP
695 691#include "pelt.h"
696#include "sched-pelt.h" 692#include "sched-pelt.h"
697 693
698static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); 694static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
@@ -735,11 +731,12 @@ static void attach_entity_cfs_rq(struct sched_entity *se);
735 * To solve this problem, we also cap the util_avg of successive tasks to 731 * To solve this problem, we also cap the util_avg of successive tasks to
736 * only 1/2 of the left utilization budget: 732 * only 1/2 of the left utilization budget:
737 * 733 *
738 * util_avg_cap = (1024 - cfs_rq->avg.util_avg) / 2^n 734 * util_avg_cap = (cpu_scale - cfs_rq->avg.util_avg) / 2^n
739 * 735 *
740 * where n denotes the nth task. 736 * where n denotes the nth task and cpu_scale the CPU capacity.
741 * 737 *
742 * For example, a simplest series from the beginning would be like: 738 * For example, for a CPU with 1024 of capacity, a simplest series from
739 * the beginning would be like:
743 * 740 *
744 * task util_avg: 512, 256, 128, 64, 32, 16, 8, ... 741 * task util_avg: 512, 256, 128, 64, 32, 16, 8, ...
745 * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ... 742 * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ...
@@ -751,7 +748,8 @@ void post_init_entity_util_avg(struct sched_entity *se)
751{ 748{
752 struct cfs_rq *cfs_rq = cfs_rq_of(se); 749 struct cfs_rq *cfs_rq = cfs_rq_of(se);
753 struct sched_avg *sa = &se->avg; 750 struct sched_avg *sa = &se->avg;
754 long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2; 751 long cpu_scale = arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
752 long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
755 753
756 if (cap > 0) { 754 if (cap > 0) {
757 if (cfs_rq->avg.util_avg != 0) { 755 if (cfs_rq->avg.util_avg != 0) {
@@ -1314,7 +1312,7 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
1314 * of each group. Skip other nodes. 1312 * of each group. Skip other nodes.
1315 */ 1313 */
1316 if (sched_numa_topology_type == NUMA_BACKPLANE && 1314 if (sched_numa_topology_type == NUMA_BACKPLANE &&
1317 dist > maxdist) 1315 dist >= maxdist)
1318 continue; 1316 continue;
1319 1317
1320 /* Add up the faults from nearby nodes. */ 1318 /* Add up the faults from nearby nodes. */
@@ -1452,15 +1450,12 @@ static unsigned long capacity_of(int cpu);
1452 1450
1453/* Cached statistics for all CPUs within a node */ 1451/* Cached statistics for all CPUs within a node */
1454struct numa_stats { 1452struct numa_stats {
1455 unsigned long nr_running;
1456 unsigned long load; 1453 unsigned long load;
1457 1454
1458 /* Total compute capacity of CPUs on a node */ 1455 /* Total compute capacity of CPUs on a node */
1459 unsigned long compute_capacity; 1456 unsigned long compute_capacity;
1460 1457
1461 /* Approximate capacity in terms of runnable tasks on a node */ 1458 unsigned int nr_running;
1462 unsigned long task_capacity;
1463 int has_free_capacity;
1464}; 1459};
1465 1460
1466/* 1461/*
@@ -1487,8 +1482,7 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
1487 * the @ns structure is NULL'ed and task_numa_compare() will 1482 * the @ns structure is NULL'ed and task_numa_compare() will
1488 * not find this node attractive. 1483 * not find this node attractive.
1489 * 1484 *
1490 * We'll either bail at !has_free_capacity, or we'll detect a huge 1485 * We'll detect a huge imbalance and bail there.
1491 * imbalance and bail there.
1492 */ 1486 */
1493 if (!cpus) 1487 if (!cpus)
1494 return; 1488 return;
@@ -1497,9 +1491,8 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
1497 smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, ns->compute_capacity); 1491 smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, ns->compute_capacity);
1498 capacity = cpus / smt; /* cores */ 1492 capacity = cpus / smt; /* cores */
1499 1493
1500 ns->task_capacity = min_t(unsigned, capacity, 1494 capacity = min_t(unsigned, capacity,
1501 DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE)); 1495 DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE));
1502 ns->has_free_capacity = (ns->nr_running < ns->task_capacity);
1503} 1496}
1504 1497
1505struct task_numa_env { 1498struct task_numa_env {
@@ -1548,28 +1541,12 @@ static bool load_too_imbalanced(long src_load, long dst_load,
1548 src_capacity = env->src_stats.compute_capacity; 1541 src_capacity = env->src_stats.compute_capacity;
1549 dst_capacity = env->dst_stats.compute_capacity; 1542 dst_capacity = env->dst_stats.compute_capacity;
1550 1543
1551 /* We care about the slope of the imbalance, not the direction. */ 1544 imb = abs(dst_load * src_capacity - src_load * dst_capacity);
1552 if (dst_load < src_load)
1553 swap(dst_load, src_load);
1554 1545
1555 /* Is the difference below the threshold? */
1556 imb = dst_load * src_capacity * 100 -
1557 src_load * dst_capacity * env->imbalance_pct;
1558 if (imb <= 0)
1559 return false;
1560
1561 /*
1562 * The imbalance is above the allowed threshold.
1563 * Compare it with the old imbalance.
1564 */
1565 orig_src_load = env->src_stats.load; 1546 orig_src_load = env->src_stats.load;
1566 orig_dst_load = env->dst_stats.load; 1547 orig_dst_load = env->dst_stats.load;
1567 1548
1568 if (orig_dst_load < orig_src_load) 1549 old_imb = abs(orig_dst_load * src_capacity - orig_src_load * dst_capacity);
1569 swap(orig_dst_load, orig_src_load);
1570
1571 old_imb = orig_dst_load * src_capacity * 100 -
1572 orig_src_load * dst_capacity * env->imbalance_pct;
1573 1550
1574 /* Would this change make things worse? */ 1551 /* Would this change make things worse? */
1575 return (imb > old_imb); 1552 return (imb > old_imb);
@@ -1582,9 +1559,8 @@ static bool load_too_imbalanced(long src_load, long dst_load,
1582 * be exchanged with the source task 1559 * be exchanged with the source task
1583 */ 1560 */
1584static void task_numa_compare(struct task_numa_env *env, 1561static void task_numa_compare(struct task_numa_env *env,
1585 long taskimp, long groupimp) 1562 long taskimp, long groupimp, bool maymove)
1586{ 1563{
1587 struct rq *src_rq = cpu_rq(env->src_cpu);
1588 struct rq *dst_rq = cpu_rq(env->dst_cpu); 1564 struct rq *dst_rq = cpu_rq(env->dst_cpu);
1589 struct task_struct *cur; 1565 struct task_struct *cur;
1590 long src_load, dst_load; 1566 long src_load, dst_load;
@@ -1605,97 +1581,73 @@ static void task_numa_compare(struct task_numa_env *env,
1605 if (cur == env->p) 1581 if (cur == env->p)
1606 goto unlock; 1582 goto unlock;
1607 1583
1584 if (!cur) {
1585 if (maymove || imp > env->best_imp)
1586 goto assign;
1587 else
1588 goto unlock;
1589 }
1590
1608 /* 1591 /*
1609 * "imp" is the fault differential for the source task between the 1592 * "imp" is the fault differential for the source task between the
1610 * source and destination node. Calculate the total differential for 1593 * source and destination node. Calculate the total differential for
1611 * the source task and potential destination task. The more negative 1594 * the source task and potential destination task. The more negative
1612 * the value is, the more rmeote accesses that would be expected to 1595 * the value is, the more remote accesses that would be expected to
1613 * be incurred if the tasks were swapped. 1596 * be incurred if the tasks were swapped.
1614 */ 1597 */
1615 if (cur) { 1598 /* Skip this swap candidate if cannot move to the source cpu */
1616 /* Skip this swap candidate if cannot move to the source CPU: */ 1599 if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
1617 if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) 1600 goto unlock;
1618 goto unlock;
1619 1601
1602 /*
1603 * If dst and source tasks are in the same NUMA group, or not
1604 * in any group then look only at task weights.
1605 */
1606 if (cur->numa_group == env->p->numa_group) {
1607 imp = taskimp + task_weight(cur, env->src_nid, dist) -
1608 task_weight(cur, env->dst_nid, dist);
1620 /* 1609 /*
1621 * If dst and source tasks are in the same NUMA group, or not 1610 * Add some hysteresis to prevent swapping the
1622 * in any group then look only at task weights. 1611 * tasks within a group over tiny differences.
1623 */ 1612 */
1624 if (cur->numa_group == env->p->numa_group) { 1613 if (cur->numa_group)
1625 imp = taskimp + task_weight(cur, env->src_nid, dist) - 1614 imp -= imp / 16;
1626 task_weight(cur, env->dst_nid, dist); 1615 } else {
1627 /* 1616 /*
1628 * Add some hysteresis to prevent swapping the 1617 * Compare the group weights. If a task is all by itself
1629 * tasks within a group over tiny differences. 1618 * (not part of a group), use the task weight instead.
1630 */ 1619 */
1631 if (cur->numa_group) 1620 if (cur->numa_group && env->p->numa_group)
1632 imp -= imp/16; 1621 imp += group_weight(cur, env->src_nid, dist) -
1633 } else { 1622 group_weight(cur, env->dst_nid, dist);
1634 /* 1623 else
1635 * Compare the group weights. If a task is all by 1624 imp += task_weight(cur, env->src_nid, dist) -
1636 * itself (not part of a group), use the task weight 1625 task_weight(cur, env->dst_nid, dist);
1637 * instead.
1638 */
1639 if (cur->numa_group)
1640 imp += group_weight(cur, env->src_nid, dist) -
1641 group_weight(cur, env->dst_nid, dist);
1642 else
1643 imp += task_weight(cur, env->src_nid, dist) -
1644 task_weight(cur, env->dst_nid, dist);
1645 }
1646 } 1626 }
1647 1627
1648 if (imp <= env->best_imp && moveimp <= env->best_imp) 1628 if (imp <= env->best_imp)
1649 goto unlock; 1629 goto unlock;
1650 1630
1651 if (!cur) { 1631 if (maymove && moveimp > imp && moveimp > env->best_imp) {
1652 /* Is there capacity at our destination? */ 1632 imp = moveimp - 1;
1653 if (env->src_stats.nr_running <= env->src_stats.task_capacity && 1633 cur = NULL;
1654 !env->dst_stats.has_free_capacity)
1655 goto unlock;
1656
1657 goto balance;
1658 }
1659
1660 /* Balance doesn't matter much if we're running a task per CPU: */
1661 if (imp > env->best_imp && src_rq->nr_running == 1 &&
1662 dst_rq->nr_running == 1)
1663 goto assign; 1634 goto assign;
1635 }
1664 1636
1665 /* 1637 /*
1666 * In the overloaded case, try and keep the load balanced. 1638 * In the overloaded case, try and keep the load balanced.
1667 */ 1639 */
1668balance: 1640 load = task_h_load(env->p) - task_h_load(cur);
1669 load = task_h_load(env->p); 1641 if (!load)
1642 goto assign;
1643
1670 dst_load = env->dst_stats.load + load; 1644 dst_load = env->dst_stats.load + load;
1671 src_load = env->src_stats.load - load; 1645 src_load = env->src_stats.load - load;
1672 1646
1673 if (moveimp > imp && moveimp > env->best_imp) {
1674 /*
1675 * If the improvement from just moving env->p direction is
1676 * better than swapping tasks around, check if a move is
1677 * possible. Store a slightly smaller score than moveimp,
1678 * so an actually idle CPU will win.
1679 */
1680 if (!load_too_imbalanced(src_load, dst_load, env)) {
1681 imp = moveimp - 1;
1682 cur = NULL;
1683 goto assign;
1684 }
1685 }
1686
1687 if (imp <= env->best_imp)
1688 goto unlock;
1689
1690 if (cur) {
1691 load = task_h_load(cur);
1692 dst_load -= load;
1693 src_load += load;
1694 }
1695
1696 if (load_too_imbalanced(src_load, dst_load, env)) 1647 if (load_too_imbalanced(src_load, dst_load, env))
1697 goto unlock; 1648 goto unlock;
1698 1649
1650assign:
1699 /* 1651 /*
1700 * One idle CPU per node is evaluated for a task numa move. 1652 * One idle CPU per node is evaluated for a task numa move.
1701 * Call select_idle_sibling to maybe find a better one. 1653 * Call select_idle_sibling to maybe find a better one.
@@ -1711,7 +1663,6 @@ balance:
1711 local_irq_enable(); 1663 local_irq_enable();
1712 } 1664 }
1713 1665
1714assign:
1715 task_numa_assign(env, cur, imp); 1666 task_numa_assign(env, cur, imp);
1716unlock: 1667unlock:
1717 rcu_read_unlock(); 1668 rcu_read_unlock();
@@ -1720,43 +1671,30 @@ unlock:
1720static void task_numa_find_cpu(struct task_numa_env *env, 1671static void task_numa_find_cpu(struct task_numa_env *env,
1721 long taskimp, long groupimp) 1672 long taskimp, long groupimp)
1722{ 1673{
1674 long src_load, dst_load, load;
1675 bool maymove = false;
1723 int cpu; 1676 int cpu;
1724 1677
1678 load = task_h_load(env->p);
1679 dst_load = env->dst_stats.load + load;
1680 src_load = env->src_stats.load - load;
1681
1682 /*
1683 * If the improvement from just moving env->p direction is better
1684 * than swapping tasks around, check if a move is possible.
1685 */
1686 maymove = !load_too_imbalanced(src_load, dst_load, env);
1687
1725 for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { 1688 for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
1726 /* Skip this CPU if the source task cannot migrate */ 1689 /* Skip this CPU if the source task cannot migrate */
1727 if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed)) 1690 if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
1728 continue; 1691 continue;
1729 1692
1730 env->dst_cpu = cpu; 1693 env->dst_cpu = cpu;
1731 task_numa_compare(env, taskimp, groupimp); 1694 task_numa_compare(env, taskimp, groupimp, maymove);
1732 } 1695 }
1733} 1696}
1734 1697
1735/* Only move tasks to a NUMA node less busy than the current node. */
1736static bool numa_has_capacity(struct task_numa_env *env)
1737{
1738 struct numa_stats *src = &env->src_stats;
1739 struct numa_stats *dst = &env->dst_stats;
1740
1741 if (src->has_free_capacity && !dst->has_free_capacity)
1742 return false;
1743
1744 /*
1745 * Only consider a task move if the source has a higher load
1746 * than the destination, corrected for CPU capacity on each node.
1747 *
1748 * src->load dst->load
1749 * --------------------- vs ---------------------
1750 * src->compute_capacity dst->compute_capacity
1751 */
1752 if (src->load * dst->compute_capacity * env->imbalance_pct >
1753
1754 dst->load * src->compute_capacity * 100)
1755 return true;
1756
1757 return false;
1758}
1759
1760static int task_numa_migrate(struct task_struct *p) 1698static int task_numa_migrate(struct task_struct *p)
1761{ 1699{
1762 struct task_numa_env env = { 1700 struct task_numa_env env = {
@@ -1797,7 +1735,7 @@ static int task_numa_migrate(struct task_struct *p)
1797 * elsewhere, so there is no point in (re)trying. 1735 * elsewhere, so there is no point in (re)trying.
1798 */ 1736 */
1799 if (unlikely(!sd)) { 1737 if (unlikely(!sd)) {
1800 p->numa_preferred_nid = task_node(p); 1738 sched_setnuma(p, task_node(p));
1801 return -EINVAL; 1739 return -EINVAL;
1802 } 1740 }
1803 1741
@@ -1811,8 +1749,7 @@ static int task_numa_migrate(struct task_struct *p)
1811 update_numa_stats(&env.dst_stats, env.dst_nid); 1749 update_numa_stats(&env.dst_stats, env.dst_nid);
1812 1750
1813 /* Try to find a spot on the preferred nid. */ 1751 /* Try to find a spot on the preferred nid. */
1814 if (numa_has_capacity(&env)) 1752 task_numa_find_cpu(&env, taskimp, groupimp);
1815 task_numa_find_cpu(&env, taskimp, groupimp);
1816 1753
1817 /* 1754 /*
1818 * Look at other nodes in these cases: 1755 * Look at other nodes in these cases:
@@ -1842,8 +1779,7 @@ static int task_numa_migrate(struct task_struct *p)
1842 env.dist = dist; 1779 env.dist = dist;
1843 env.dst_nid = nid; 1780 env.dst_nid = nid;
1844 update_numa_stats(&env.dst_stats, env.dst_nid); 1781 update_numa_stats(&env.dst_stats, env.dst_nid);
1845 if (numa_has_capacity(&env)) 1782 task_numa_find_cpu(&env, taskimp, groupimp);
1846 task_numa_find_cpu(&env, taskimp, groupimp);
1847 } 1783 }
1848 } 1784 }
1849 1785
@@ -1856,15 +1792,13 @@ static int task_numa_migrate(struct task_struct *p)
1856 * trying for a better one later. Do not set the preferred node here. 1792 * trying for a better one later. Do not set the preferred node here.
1857 */ 1793 */
1858 if (p->numa_group) { 1794 if (p->numa_group) {
1859 struct numa_group *ng = p->numa_group;
1860
1861 if (env.best_cpu == -1) 1795 if (env.best_cpu == -1)
1862 nid = env.src_nid; 1796 nid = env.src_nid;
1863 else 1797 else
1864 nid = env.dst_nid; 1798 nid = cpu_to_node(env.best_cpu);
1865 1799
1866 if (ng->active_nodes > 1 && numa_is_active_node(env.dst_nid, ng)) 1800 if (nid != p->numa_preferred_nid)
1867 sched_setnuma(p, env.dst_nid); 1801 sched_setnuma(p, nid);
1868 } 1802 }
1869 1803
1870 /* No better CPU than the current one was found. */ 1804 /* No better CPU than the current one was found. */
@@ -1884,7 +1818,8 @@ static int task_numa_migrate(struct task_struct *p)
1884 return ret; 1818 return ret;
1885 } 1819 }
1886 1820
1887 ret = migrate_swap(p, env.best_task); 1821 ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
1822
1888 if (ret != 0) 1823 if (ret != 0)
1889 trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task)); 1824 trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
1890 put_task_struct(env.best_task); 1825 put_task_struct(env.best_task);
@@ -2144,8 +2079,8 @@ static int preferred_group_nid(struct task_struct *p, int nid)
2144 2079
2145static void task_numa_placement(struct task_struct *p) 2080static void task_numa_placement(struct task_struct *p)
2146{ 2081{
2147 int seq, nid, max_nid = -1, max_group_nid = -1; 2082 int seq, nid, max_nid = -1;
2148 unsigned long max_faults = 0, max_group_faults = 0; 2083 unsigned long max_faults = 0;
2149 unsigned long fault_types[2] = { 0, 0 }; 2084 unsigned long fault_types[2] = { 0, 0 };
2150 unsigned long total_faults; 2085 unsigned long total_faults;
2151 u64 runtime, period; 2086 u64 runtime, period;
@@ -2224,33 +2159,30 @@ static void task_numa_placement(struct task_struct *p)
2224 } 2159 }
2225 } 2160 }
2226 2161
2227 if (faults > max_faults) { 2162 if (!p->numa_group) {
2228 max_faults = faults; 2163 if (faults > max_faults) {
2164 max_faults = faults;
2165 max_nid = nid;
2166 }
2167 } else if (group_faults > max_faults) {
2168 max_faults = group_faults;
2229 max_nid = nid; 2169 max_nid = nid;
2230 } 2170 }
2231
2232 if (group_faults > max_group_faults) {
2233 max_group_faults = group_faults;
2234 max_group_nid = nid;
2235 }
2236 } 2171 }
2237 2172
2238 update_task_scan_period(p, fault_types[0], fault_types[1]);
2239
2240 if (p->numa_group) { 2173 if (p->numa_group) {
2241 numa_group_count_active_nodes(p->numa_group); 2174 numa_group_count_active_nodes(p->numa_group);
2242 spin_unlock_irq(group_lock); 2175 spin_unlock_irq(group_lock);
2243 max_nid = preferred_group_nid(p, max_group_nid); 2176 max_nid = preferred_group_nid(p, max_nid);
2244 } 2177 }
2245 2178
2246 if (max_faults) { 2179 if (max_faults) {
2247 /* Set the new preferred node */ 2180 /* Set the new preferred node */
2248 if (max_nid != p->numa_preferred_nid) 2181 if (max_nid != p->numa_preferred_nid)
2249 sched_setnuma(p, max_nid); 2182 sched_setnuma(p, max_nid);
2250
2251 if (task_node(p) != p->numa_preferred_nid)
2252 numa_migrate_preferred(p);
2253 } 2183 }
2184
2185 update_task_scan_period(p, fault_types[0], fault_types[1]);
2254} 2186}
2255 2187
2256static inline int get_numa_group(struct numa_group *grp) 2188static inline int get_numa_group(struct numa_group *grp)
@@ -2450,14 +2382,14 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
2450 numa_is_active_node(mem_node, ng)) 2382 numa_is_active_node(mem_node, ng))
2451 local = 1; 2383 local = 1;
2452 2384
2453 task_numa_placement(p);
2454
2455 /* 2385 /*
2456 * Retry task to preferred node migration periodically, in case it 2386 * Retry task to preferred node migration periodically, in case it
2457 * case it previously failed, or the scheduler moved us. 2387 * case it previously failed, or the scheduler moved us.
2458 */ 2388 */
2459 if (time_after(jiffies, p->numa_migrate_retry)) 2389 if (time_after(jiffies, p->numa_migrate_retry)) {
2390 task_numa_placement(p);
2460 numa_migrate_preferred(p); 2391 numa_migrate_preferred(p);
2392 }
2461 2393
2462 if (migrated) 2394 if (migrated)
2463 p->numa_pages_migrated += pages; 2395 p->numa_pages_migrated += pages;
@@ -2749,19 +2681,6 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
2749} while (0) 2681} while (0)
2750 2682
2751#ifdef CONFIG_SMP 2683#ifdef CONFIG_SMP
2752/*
2753 * XXX we want to get rid of these helpers and use the full load resolution.
2754 */
2755static inline long se_weight(struct sched_entity *se)
2756{
2757 return scale_load_down(se->load.weight);
2758}
2759
2760static inline long se_runnable(struct sched_entity *se)
2761{
2762 return scale_load_down(se->runnable_weight);
2763}
2764
2765static inline void 2684static inline void
2766enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) 2685enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
2767{ 2686{
@@ -3062,314 +2981,6 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
3062} 2981}
3063 2982
3064#ifdef CONFIG_SMP 2983#ifdef CONFIG_SMP
3065/*
3066 * Approximate:
3067 * val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
3068 */
3069static u64 decay_load(u64 val, u64 n)
3070{
3071 unsigned int local_n;
3072
3073 if (unlikely(n > LOAD_AVG_PERIOD * 63))
3074 return 0;
3075
3076 /* after bounds checking we can collapse to 32-bit */
3077 local_n = n;
3078
3079 /*
3080 * As y^PERIOD = 1/2, we can combine
3081 * y^n = 1/2^(n/PERIOD) * y^(n%PERIOD)
3082 * With a look-up table which covers y^n (n<PERIOD)
3083 *
3084 * To achieve constant time decay_load.
3085 */
3086 if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
3087 val >>= local_n / LOAD_AVG_PERIOD;
3088 local_n %= LOAD_AVG_PERIOD;
3089 }
3090
3091 val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32);
3092 return val;
3093}
3094
3095static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
3096{
3097 u32 c1, c2, c3 = d3; /* y^0 == 1 */
3098
3099 /*
3100 * c1 = d1 y^p
3101 */
3102 c1 = decay_load((u64)d1, periods);
3103
3104 /*
3105 * p-1
3106 * c2 = 1024 \Sum y^n
3107 * n=1
3108 *
3109 * inf inf
3110 * = 1024 ( \Sum y^n - \Sum y^n - y^0 )
3111 * n=0 n=p
3112 */
3113 c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
3114
3115 return c1 + c2 + c3;
3116}
3117
3118/*
3119 * Accumulate the three separate parts of the sum; d1 the remainder
3120 * of the last (incomplete) period, d2 the span of full periods and d3
3121 * the remainder of the (incomplete) current period.
3122 *
3123 * d1 d2 d3
3124 * ^ ^ ^
3125 * | | |
3126 * |<->|<----------------->|<--->|
3127 * ... |---x---|------| ... |------|-----x (now)
3128 *
3129 * p-1
3130 * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
3131 * n=1
3132 *
3133 * = u y^p + (Step 1)
3134 *
3135 * p-1
3136 * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
3137 * n=1
3138 */
3139static __always_inline u32
3140accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
3141 unsigned long load, unsigned long runnable, int running)
3142{
3143 unsigned long scale_freq, scale_cpu;
3144 u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
3145 u64 periods;
3146
3147 scale_freq = arch_scale_freq_capacity(cpu);
3148 scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
3149
3150 delta += sa->period_contrib;
3151 periods = delta / 1024; /* A period is 1024us (~1ms) */
3152
3153 /*
3154 * Step 1: decay old *_sum if we crossed period boundaries.
3155 */
3156 if (periods) {
3157 sa->load_sum = decay_load(sa->load_sum, periods);
3158 sa->runnable_load_sum =
3159 decay_load(sa->runnable_load_sum, periods);
3160 sa->util_sum = decay_load((u64)(sa->util_sum), periods);
3161
3162 /*
3163 * Step 2
3164 */
3165 delta %= 1024;
3166 contrib = __accumulate_pelt_segments(periods,
3167 1024 - sa->period_contrib, delta);
3168 }
3169 sa->period_contrib = delta;
3170
3171 contrib = cap_scale(contrib, scale_freq);
3172 if (load)
3173 sa->load_sum += load * contrib;
3174 if (runnable)
3175 sa->runnable_load_sum += runnable * contrib;
3176 if (running)
3177 sa->util_sum += contrib * scale_cpu;
3178
3179 return periods;
3180}
3181
3182/*
3183 * We can represent the historical contribution to runnable average as the
3184 * coefficients of a geometric series. To do this we sub-divide our runnable
3185 * history into segments of approximately 1ms (1024us); label the segment that
3186 * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
3187 *
3188 * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
3189 * p0 p1 p2
3190 * (now) (~1ms ago) (~2ms ago)
3191 *
3192 * Let u_i denote the fraction of p_i that the entity was runnable.
3193 *
3194 * We then designate the fractions u_i as our co-efficients, yielding the
3195 * following representation of historical load:
3196 * u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
3197 *
3198 * We choose y based on the with of a reasonably scheduling period, fixing:
3199 * y^32 = 0.5
3200 *
3201 * This means that the contribution to load ~32ms ago (u_32) will be weighted
3202 * approximately half as much as the contribution to load within the last ms
3203 * (u_0).
3204 *
3205 * When a period "rolls over" and we have new u_0`, multiplying the previous
3206 * sum again by y is sufficient to update:
3207 * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
3208 * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
3209 */
3210static __always_inline int
3211___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
3212 unsigned long load, unsigned long runnable, int running)
3213{
3214 u64 delta;
3215
3216 delta = now - sa->last_update_time;
3217 /*
3218 * This should only happen when time goes backwards, which it
3219 * unfortunately does during sched clock init when we swap over to TSC.
3220 */
3221 if ((s64)delta < 0) {
3222 sa->last_update_time = now;
3223 return 0;
3224 }
3225
3226 /*
3227 * Use 1024ns as the unit of measurement since it's a reasonable
3228 * approximation of 1us and fast to compute.
3229 */
3230 delta >>= 10;
3231 if (!delta)
3232 return 0;
3233
3234 sa->last_update_time += delta << 10;
3235
3236 /*
3237 * running is a subset of runnable (weight) so running can't be set if
3238 * runnable is clear. But there are some corner cases where the current
3239 * se has been already dequeued but cfs_rq->curr still points to it.
3240 * This means that weight will be 0 but not running for a sched_entity
3241 * but also for a cfs_rq if the latter becomes idle. As an example,
3242 * this happens during idle_balance() which calls
3243 * update_blocked_averages()
3244 */
3245 if (!load)
3246 runnable = running = 0;
3247
3248 /*
3249 * Now we know we crossed measurement unit boundaries. The *_avg
3250 * accrues by two steps:
3251 *
3252 * Step 1: accumulate *_sum since last_update_time. If we haven't
3253 * crossed period boundaries, finish.
3254 */
3255 if (!accumulate_sum(delta, cpu, sa, load, runnable, running))
3256 return 0;
3257
3258 return 1;
3259}
3260
3261static __always_inline void
3262___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)
3263{
3264 u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
3265
3266 /*
3267 * Step 2: update *_avg.
3268 */
3269 sa->load_avg = div_u64(load * sa->load_sum, divider);
3270 sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
3271 sa->util_avg = sa->util_sum / divider;
3272}
3273
3274/*
3275 * When a task is dequeued, its estimated utilization should not be update if
3276 * its util_avg has not been updated at least once.
3277 * This flag is used to synchronize util_avg updates with util_est updates.
3278 * We map this information into the LSB bit of the utilization saved at
3279 * dequeue time (i.e. util_est.dequeued).
3280 */
3281#define UTIL_AVG_UNCHANGED 0x1
3282
3283static inline void cfs_se_util_change(struct sched_avg *avg)
3284{
3285 unsigned int enqueued;
3286
3287 if (!sched_feat(UTIL_EST))
3288 return;
3289
3290 /* Avoid store if the flag has been already set */
3291 enqueued = avg->util_est.enqueued;
3292 if (!(enqueued & UTIL_AVG_UNCHANGED))
3293 return;
3294
3295 /* Reset flag to report util_avg has been updated */
3296 enqueued &= ~UTIL_AVG_UNCHANGED;
3297 WRITE_ONCE(avg->util_est.enqueued, enqueued);
3298}
3299
3300/*
3301 * sched_entity:
3302 *
3303 * task:
3304 * se_runnable() == se_weight()
3305 *
3306 * group: [ see update_cfs_group() ]
3307 * se_weight() = tg->weight * grq->load_avg / tg->load_avg
3308 * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg
3309 *
3310 * load_sum := runnable_sum
3311 * load_avg = se_weight(se) * runnable_avg
3312 *
3313 * runnable_load_sum := runnable_sum
3314 * runnable_load_avg = se_runnable(se) * runnable_avg
3315 *
3316 * XXX collapse load_sum and runnable_load_sum
3317 *
3318 * cfq_rs:
3319 *
3320 * load_sum = \Sum se_weight(se) * se->avg.load_sum
3321 * load_avg = \Sum se->avg.load_avg
3322 *
3323 * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum
3324 * runnable_load_avg = \Sum se->avg.runable_load_avg
3325 */
3326
3327static int
3328__update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
3329{
3330 if (entity_is_task(se))
3331 se->runnable_weight = se->load.weight;
3332
3333 if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) {
3334 ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
3335 return 1;
3336 }
3337
3338 return 0;
3339}
3340
3341static int
3342__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
3343{
3344 if (entity_is_task(se))
3345 se->runnable_weight = se->load.weight;
3346
3347 if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq,
3348 cfs_rq->curr == se)) {
3349
3350 ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
3351 cfs_se_util_change(&se->avg);
3352 return 1;
3353 }
3354
3355 return 0;
3356}
3357
3358static int
3359__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
3360{
3361 if (___update_load_sum(now, cpu, &cfs_rq->avg,
3362 scale_load_down(cfs_rq->load.weight),
3363 scale_load_down(cfs_rq->runnable_weight),
3364 cfs_rq->curr != NULL)) {
3365
3366 ___update_load_avg(&cfs_rq->avg, 1, 1);
3367 return 1;
3368 }
3369
3370 return 0;
3371}
3372
3373#ifdef CONFIG_FAIR_GROUP_SCHED 2984#ifdef CONFIG_FAIR_GROUP_SCHED
3374/** 2985/**
3375 * update_tg_load_avg - update the tg's load avg 2986 * update_tg_load_avg - update the tg's load avg
@@ -4037,12 +3648,6 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
4037 3648
4038#else /* CONFIG_SMP */ 3649#else /* CONFIG_SMP */
4039 3650
4040static inline int
4041update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
4042{
4043 return 0;
4044}
4045
4046#define UPDATE_TG 0x0 3651#define UPDATE_TG 0x0
4047#define SKIP_AGE_LOAD 0x0 3652#define SKIP_AGE_LOAD 0x0
4048#define DO_ATTACH 0x0 3653#define DO_ATTACH 0x0
@@ -4726,7 +4331,6 @@ static inline int throttled_lb_pair(struct task_group *tg,
4726 throttled_hierarchy(dest_cfs_rq); 4331 throttled_hierarchy(dest_cfs_rq);
4727} 4332}
4728 4333
4729/* updated child weight may affect parent so we have to do this bottom up */
4730static int tg_unthrottle_up(struct task_group *tg, void *data) 4334static int tg_unthrottle_up(struct task_group *tg, void *data)
4731{ 4335{
4732 struct rq *rq = data; 4336 struct rq *rq = data;
@@ -5653,8 +5257,6 @@ static void cpu_load_update(struct rq *this_rq, unsigned long this_load,
5653 5257
5654 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; 5258 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
5655 } 5259 }
5656
5657 sched_avg_update(this_rq);
5658} 5260}
5659 5261
5660/* Used instead of source_load when we know the type == 0 */ 5262/* Used instead of source_load when we know the type == 0 */
@@ -7294,8 +6896,8 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
7294static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env) 6896static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
7295{ 6897{
7296 struct numa_group *numa_group = rcu_dereference(p->numa_group); 6898 struct numa_group *numa_group = rcu_dereference(p->numa_group);
7297 unsigned long src_faults, dst_faults; 6899 unsigned long src_weight, dst_weight;
7298 int src_nid, dst_nid; 6900 int src_nid, dst_nid, dist;
7299 6901
7300 if (!static_branch_likely(&sched_numa_balancing)) 6902 if (!static_branch_likely(&sched_numa_balancing))
7301 return -1; 6903 return -1;
@@ -7322,18 +6924,19 @@ static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
7322 return 0; 6924 return 0;
7323 6925
7324 /* Leaving a core idle is often worse than degrading locality. */ 6926 /* Leaving a core idle is often worse than degrading locality. */
7325 if (env->idle != CPU_NOT_IDLE) 6927 if (env->idle == CPU_IDLE)
7326 return -1; 6928 return -1;
7327 6929
6930 dist = node_distance(src_nid, dst_nid);
7328 if (numa_group) { 6931 if (numa_group) {
7329 src_faults = group_faults(p, src_nid); 6932 src_weight = group_weight(p, src_nid, dist);
7330 dst_faults = group_faults(p, dst_nid); 6933 dst_weight = group_weight(p, dst_nid, dist);
7331 } else { 6934 } else {
7332 src_faults = task_faults(p, src_nid); 6935 src_weight = task_weight(p, src_nid, dist);
7333 dst_faults = task_faults(p, dst_nid); 6936 dst_weight = task_weight(p, dst_nid, dist);
7334 } 6937 }
7335 6938
7336 return dst_faults < src_faults; 6939 return dst_weight < src_weight;
7337} 6940}
7338 6941
7339#else 6942#else
@@ -7620,6 +7223,22 @@ static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
7620 return false; 7223 return false;
7621} 7224}
7622 7225
7226static inline bool others_have_blocked(struct rq *rq)
7227{
7228 if (READ_ONCE(rq->avg_rt.util_avg))
7229 return true;
7230
7231 if (READ_ONCE(rq->avg_dl.util_avg))
7232 return true;
7233
7234#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
7235 if (READ_ONCE(rq->avg_irq.util_avg))
7236 return true;
7237#endif
7238
7239 return false;
7240}
7241
7623#ifdef CONFIG_FAIR_GROUP_SCHED 7242#ifdef CONFIG_FAIR_GROUP_SCHED
7624 7243
7625static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) 7244static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
@@ -7679,6 +7298,12 @@ static void update_blocked_averages(int cpu)
7679 if (cfs_rq_has_blocked(cfs_rq)) 7298 if (cfs_rq_has_blocked(cfs_rq))
7680 done = false; 7299 done = false;
7681 } 7300 }
7301 update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
7302 update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
7303 update_irq_load_avg(rq, 0);
7304 /* Don't need periodic decay once load/util_avg are null */
7305 if (others_have_blocked(rq))
7306 done = false;
7682 7307
7683#ifdef CONFIG_NO_HZ_COMMON 7308#ifdef CONFIG_NO_HZ_COMMON
7684 rq->last_blocked_load_update_tick = jiffies; 7309 rq->last_blocked_load_update_tick = jiffies;
@@ -7744,9 +7369,12 @@ static inline void update_blocked_averages(int cpu)
7744 rq_lock_irqsave(rq, &rf); 7369 rq_lock_irqsave(rq, &rf);
7745 update_rq_clock(rq); 7370 update_rq_clock(rq);
7746 update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq); 7371 update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
7372 update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
7373 update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
7374 update_irq_load_avg(rq, 0);
7747#ifdef CONFIG_NO_HZ_COMMON 7375#ifdef CONFIG_NO_HZ_COMMON
7748 rq->last_blocked_load_update_tick = jiffies; 7376 rq->last_blocked_load_update_tick = jiffies;
7749 if (!cfs_rq_has_blocked(cfs_rq)) 7377 if (!cfs_rq_has_blocked(cfs_rq) && !others_have_blocked(rq))
7750 rq->has_blocked_load = 0; 7378 rq->has_blocked_load = 0;
7751#endif 7379#endif
7752 rq_unlock_irqrestore(rq, &rf); 7380 rq_unlock_irqrestore(rq, &rf);
@@ -7856,39 +7484,32 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
7856static unsigned long scale_rt_capacity(int cpu) 7484static unsigned long scale_rt_capacity(int cpu)
7857{ 7485{
7858 struct rq *rq = cpu_rq(cpu); 7486 struct rq *rq = cpu_rq(cpu);
7859 u64 total, used, age_stamp, avg; 7487 unsigned long max = arch_scale_cpu_capacity(NULL, cpu);
7860 s64 delta; 7488 unsigned long used, free;
7489 unsigned long irq;
7861 7490
7862 /* 7491 irq = cpu_util_irq(rq);
7863 * Since we're reading these variables without serialization make sure
7864 * we read them once before doing sanity checks on them.
7865 */
7866 age_stamp = READ_ONCE(rq->age_stamp);
7867 avg = READ_ONCE(rq->rt_avg);
7868 delta = __rq_clock_broken(rq) - age_stamp;
7869 7492
7870 if (unlikely(delta < 0)) 7493 if (unlikely(irq >= max))
7871 delta = 0; 7494 return 1;
7872 7495
7873 total = sched_avg_period() + delta; 7496 used = READ_ONCE(rq->avg_rt.util_avg);
7497 used += READ_ONCE(rq->avg_dl.util_avg);
7874 7498
7875 used = div_u64(avg, total); 7499 if (unlikely(used >= max))
7500 return 1;
7876 7501
7877 if (likely(used < SCHED_CAPACITY_SCALE)) 7502 free = max - used;
7878 return SCHED_CAPACITY_SCALE - used;
7879 7503
7880 return 1; 7504 return scale_irq_capacity(free, irq, max);
7881} 7505}
7882 7506
7883static void update_cpu_capacity(struct sched_domain *sd, int cpu) 7507static void update_cpu_capacity(struct sched_domain *sd, int cpu)
7884{ 7508{
7885 unsigned long capacity = arch_scale_cpu_capacity(sd, cpu); 7509 unsigned long capacity = scale_rt_capacity(cpu);
7886 struct sched_group *sdg = sd->groups; 7510 struct sched_group *sdg = sd->groups;
7887 7511
7888 cpu_rq(cpu)->cpu_capacity_orig = capacity; 7512 cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu);
7889
7890 capacity *= scale_rt_capacity(cpu);
7891 capacity >>= SCHED_CAPACITY_SHIFT;
7892 7513
7893 if (!capacity) 7514 if (!capacity)
7894 capacity = 1; 7515 capacity = 1;
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
new file mode 100644
index 000000000000..35475c0c5419
--- /dev/null
+++ b/kernel/sched/pelt.c
@@ -0,0 +1,399 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Per Entity Load Tracking
4 *
5 * Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
6 *
7 * Interactivity improvements by Mike Galbraith
8 * (C) 2007 Mike Galbraith <efault@gmx.de>
9 *
10 * Various enhancements by Dmitry Adamushko.
11 * (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
12 *
13 * Group scheduling enhancements by Srivatsa Vaddagiri
14 * Copyright IBM Corporation, 2007
15 * Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
16 *
17 * Scaled math optimizations by Thomas Gleixner
18 * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
19 *
20 * Adaptive scheduling granularity, math enhancements by Peter Zijlstra
21 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
22 *
23 * Move PELT related code from fair.c into this pelt.c file
24 * Author: Vincent Guittot <vincent.guittot@linaro.org>
25 */
26
27#include <linux/sched.h>
28#include "sched.h"
29#include "sched-pelt.h"
30#include "pelt.h"
31
32/*
33 * Approximate:
34 * val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
35 */
36static u64 decay_load(u64 val, u64 n)
37{
38 unsigned int local_n;
39
40 if (unlikely(n > LOAD_AVG_PERIOD * 63))
41 return 0;
42
43 /* after bounds checking we can collapse to 32-bit */
44 local_n = n;
45
46 /*
47 * As y^PERIOD = 1/2, we can combine
48 * y^n = 1/2^(n/PERIOD) * y^(n%PERIOD)
49 * With a look-up table which covers y^n (n<PERIOD)
50 *
51 * To achieve constant time decay_load.
52 */
53 if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
54 val >>= local_n / LOAD_AVG_PERIOD;
55 local_n %= LOAD_AVG_PERIOD;
56 }
57
58 val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32);
59 return val;
60}
61
62static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
63{
64 u32 c1, c2, c3 = d3; /* y^0 == 1 */
65
66 /*
67 * c1 = d1 y^p
68 */
69 c1 = decay_load((u64)d1, periods);
70
71 /*
72 * p-1
73 * c2 = 1024 \Sum y^n
74 * n=1
75 *
76 * inf inf
77 * = 1024 ( \Sum y^n - \Sum y^n - y^0 )
78 * n=0 n=p
79 */
80 c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
81
82 return c1 + c2 + c3;
83}
84
85#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
86
87/*
88 * Accumulate the three separate parts of the sum; d1 the remainder
89 * of the last (incomplete) period, d2 the span of full periods and d3
90 * the remainder of the (incomplete) current period.
91 *
92 * d1 d2 d3
93 * ^ ^ ^
94 * | | |
95 * |<->|<----------------->|<--->|
96 * ... |---x---|------| ... |------|-----x (now)
97 *
98 * p-1
99 * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
100 * n=1
101 *
102 * = u y^p + (Step 1)
103 *
104 * p-1
105 * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
106 * n=1
107 */
108static __always_inline u32
109accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
110 unsigned long load, unsigned long runnable, int running)
111{
112 unsigned long scale_freq, scale_cpu;
113 u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
114 u64 periods;
115
116 scale_freq = arch_scale_freq_capacity(cpu);
117 scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
118
119 delta += sa->period_contrib;
120 periods = delta / 1024; /* A period is 1024us (~1ms) */
121
122 /*
123 * Step 1: decay old *_sum if we crossed period boundaries.
124 */
125 if (periods) {
126 sa->load_sum = decay_load(sa->load_sum, periods);
127 sa->runnable_load_sum =
128 decay_load(sa->runnable_load_sum, periods);
129 sa->util_sum = decay_load((u64)(sa->util_sum), periods);
130
131 /*
132 * Step 2
133 */
134 delta %= 1024;
135 contrib = __accumulate_pelt_segments(periods,
136 1024 - sa->period_contrib, delta);
137 }
138 sa->period_contrib = delta;
139
140 contrib = cap_scale(contrib, scale_freq);
141 if (load)
142 sa->load_sum += load * contrib;
143 if (runnable)
144 sa->runnable_load_sum += runnable * contrib;
145 if (running)
146 sa->util_sum += contrib * scale_cpu;
147
148 return periods;
149}
150
151/*
152 * We can represent the historical contribution to runnable average as the
153 * coefficients of a geometric series. To do this we sub-divide our runnable
154 * history into segments of approximately 1ms (1024us); label the segment that
155 * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
156 *
157 * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
158 * p0 p1 p2
159 * (now) (~1ms ago) (~2ms ago)
160 *
161 * Let u_i denote the fraction of p_i that the entity was runnable.
162 *
163 * We then designate the fractions u_i as our co-efficients, yielding the
164 * following representation of historical load:
165 * u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
166 *
167 * We choose y based on the with of a reasonably scheduling period, fixing:
168 * y^32 = 0.5
169 *
170 * This means that the contribution to load ~32ms ago (u_32) will be weighted
171 * approximately half as much as the contribution to load within the last ms
172 * (u_0).
173 *
174 * When a period "rolls over" and we have new u_0`, multiplying the previous
175 * sum again by y is sufficient to update:
176 * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
177 * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
178 */
179static __always_inline int
180___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
181 unsigned long load, unsigned long runnable, int running)
182{
183 u64 delta;
184
185 delta = now - sa->last_update_time;
186 /*
187 * This should only happen when time goes backwards, which it
188 * unfortunately does during sched clock init when we swap over to TSC.
189 */
190 if ((s64)delta < 0) {
191 sa->last_update_time = now;
192 return 0;
193 }
194
195 /*
196 * Use 1024ns as the unit of measurement since it's a reasonable
197 * approximation of 1us and fast to compute.
198 */
199 delta >>= 10;
200 if (!delta)
201 return 0;
202
203 sa->last_update_time += delta << 10;
204
205 /*
206 * running is a subset of runnable (weight) so running can't be set if
207 * runnable is clear. But there are some corner cases where the current
208 * se has been already dequeued but cfs_rq->curr still points to it.
209 * This means that weight will be 0 but not running for a sched_entity
210 * but also for a cfs_rq if the latter becomes idle. As an example,
211 * this happens during idle_balance() which calls
212 * update_blocked_averages()
213 */
214 if (!load)
215 runnable = running = 0;
216
217 /*
218 * Now we know we crossed measurement unit boundaries. The *_avg
219 * accrues by two steps:
220 *
221 * Step 1: accumulate *_sum since last_update_time. If we haven't
222 * crossed period boundaries, finish.
223 */
224 if (!accumulate_sum(delta, cpu, sa, load, runnable, running))
225 return 0;
226
227 return 1;
228}
229
230static __always_inline void
231___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)
232{
233 u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
234
235 /*
236 * Step 2: update *_avg.
237 */
238 sa->load_avg = div_u64(load * sa->load_sum, divider);
239 sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
240 WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
241}
242
243/*
244 * sched_entity:
245 *
246 * task:
247 * se_runnable() == se_weight()
248 *
249 * group: [ see update_cfs_group() ]
250 * se_weight() = tg->weight * grq->load_avg / tg->load_avg
251 * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg
252 *
253 * load_sum := runnable_sum
254 * load_avg = se_weight(se) * runnable_avg
255 *
256 * runnable_load_sum := runnable_sum
257 * runnable_load_avg = se_runnable(se) * runnable_avg
258 *
259 * XXX collapse load_sum and runnable_load_sum
260 *
261 * cfq_rq:
262 *
263 * load_sum = \Sum se_weight(se) * se->avg.load_sum
264 * load_avg = \Sum se->avg.load_avg
265 *
266 * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum
267 * runnable_load_avg = \Sum se->avg.runable_load_avg
268 */
269
270int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
271{
272 if (entity_is_task(se))
273 se->runnable_weight = se->load.weight;
274
275 if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) {
276 ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
277 return 1;
278 }
279
280 return 0;
281}
282
283int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
284{
285 if (entity_is_task(se))
286 se->runnable_weight = se->load.weight;
287
288 if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq,
289 cfs_rq->curr == se)) {
290
291 ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
292 cfs_se_util_change(&se->avg);
293 return 1;
294 }
295
296 return 0;
297}
298
299int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
300{
301 if (___update_load_sum(now, cpu, &cfs_rq->avg,
302 scale_load_down(cfs_rq->load.weight),
303 scale_load_down(cfs_rq->runnable_weight),
304 cfs_rq->curr != NULL)) {
305
306 ___update_load_avg(&cfs_rq->avg, 1, 1);
307 return 1;
308 }
309
310 return 0;
311}
312
313/*
314 * rt_rq:
315 *
316 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
317 * util_sum = cpu_scale * load_sum
318 * runnable_load_sum = load_sum
319 *
320 * load_avg and runnable_load_avg are not supported and meaningless.
321 *
322 */
323
324int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
325{
326 if (___update_load_sum(now, rq->cpu, &rq->avg_rt,
327 running,
328 running,
329 running)) {
330
331 ___update_load_avg(&rq->avg_rt, 1, 1);
332 return 1;
333 }
334
335 return 0;
336}
337
338/*
339 * dl_rq:
340 *
341 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
342 * util_sum = cpu_scale * load_sum
343 * runnable_load_sum = load_sum
344 *
345 */
346
347int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
348{
349 if (___update_load_sum(now, rq->cpu, &rq->avg_dl,
350 running,
351 running,
352 running)) {
353
354 ___update_load_avg(&rq->avg_dl, 1, 1);
355 return 1;
356 }
357
358 return 0;
359}
360
361#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
362/*
363 * irq:
364 *
365 * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
366 * util_sum = cpu_scale * load_sum
367 * runnable_load_sum = load_sum
368 *
369 */
370
371int update_irq_load_avg(struct rq *rq, u64 running)
372{
373 int ret = 0;
374 /*
375 * We know the time that has been used by interrupt since last update
376 * but we don't when. Let be pessimistic and assume that interrupt has
377 * happened just before the update. This is not so far from reality
378 * because interrupt will most probably wake up task and trig an update
379 * of rq clock during which the metric si updated.
380 * We start to decay with normal context time and then we add the
381 * interrupt context time.
382 * We can safely remove running from rq->clock because
383 * rq->clock += delta with delta >= running
384 */
385 ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq,
386 0,
387 0,
388 0);
389 ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
390 1,
391 1,
392 1);
393
394 if (ret)
395 ___update_load_avg(&rq->avg_irq, 1, 1);
396
397 return ret;
398}
399#endif
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
new file mode 100644
index 000000000000..d2894db28955
--- /dev/null
+++ b/kernel/sched/pelt.h
@@ -0,0 +1,72 @@
1#ifdef CONFIG_SMP
2
3int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se);
4int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se);
5int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq);
6int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
7int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
8
9#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
10int update_irq_load_avg(struct rq *rq, u64 running);
11#else
12static inline int
13update_irq_load_avg(struct rq *rq, u64 running)
14{
15 return 0;
16}
17#endif
18
19/*
20 * When a task is dequeued, its estimated utilization should not be update if
21 * its util_avg has not been updated at least once.
22 * This flag is used to synchronize util_avg updates with util_est updates.
23 * We map this information into the LSB bit of the utilization saved at
24 * dequeue time (i.e. util_est.dequeued).
25 */
26#define UTIL_AVG_UNCHANGED 0x1
27
28static inline void cfs_se_util_change(struct sched_avg *avg)
29{
30 unsigned int enqueued;
31
32 if (!sched_feat(UTIL_EST))
33 return;
34
35 /* Avoid store if the flag has been already set */
36 enqueued = avg->util_est.enqueued;
37 if (!(enqueued & UTIL_AVG_UNCHANGED))
38 return;
39
40 /* Reset flag to report util_avg has been updated */
41 enqueued &= ~UTIL_AVG_UNCHANGED;
42 WRITE_ONCE(avg->util_est.enqueued, enqueued);
43}
44
45#else
46
47static inline int
48update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
49{
50 return 0;
51}
52
53static inline int
54update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
55{
56 return 0;
57}
58
59static inline int
60update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
61{
62 return 0;
63}
64
65static inline int
66update_irq_load_avg(struct rq *rq, u64 running)
67{
68 return 0;
69}
70#endif
71
72
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index eaaec8364f96..2e2955a8cf8f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -5,6 +5,8 @@
5 */ 5 */
6#include "sched.h" 6#include "sched.h"
7 7
8#include "pelt.h"
9
8int sched_rr_timeslice = RR_TIMESLICE; 10int sched_rr_timeslice = RR_TIMESLICE;
9int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; 11int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
10 12
@@ -973,8 +975,6 @@ static void update_curr_rt(struct rq *rq)
973 curr->se.exec_start = now; 975 curr->se.exec_start = now;
974 cgroup_account_cputime(curr, delta_exec); 976 cgroup_account_cputime(curr, delta_exec);
975 977
976 sched_rt_avg_update(rq, delta_exec);
977
978 if (!rt_bandwidth_enabled()) 978 if (!rt_bandwidth_enabled())
979 return; 979 return;
980 980
@@ -1578,6 +1578,14 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
1578 1578
1579 rt_queue_push_tasks(rq); 1579 rt_queue_push_tasks(rq);
1580 1580
1581 /*
1582 * If prev task was rt, put_prev_task() has already updated the
1583 * utilization. We only care of the case where we start to schedule a
1584 * rt task
1585 */
1586 if (rq->curr->sched_class != &rt_sched_class)
1587 update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
1588
1581 return p; 1589 return p;
1582} 1590}
1583 1591
@@ -1585,6 +1593,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1585{ 1593{
1586 update_curr_rt(rq); 1594 update_curr_rt(rq);
1587 1595
1596 update_rt_rq_load_avg(rq_clock_task(rq), rq, 1);
1597
1588 /* 1598 /*
1589 * The previous task needs to be made eligible for pushing 1599 * The previous task needs to be made eligible for pushing
1590 * if it is still active 1600 * if it is still active
@@ -2314,6 +2324,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
2314 struct sched_rt_entity *rt_se = &p->rt; 2324 struct sched_rt_entity *rt_se = &p->rt;
2315 2325
2316 update_curr_rt(rq); 2326 update_curr_rt(rq);
2327 update_rt_rq_load_avg(rq_clock_task(rq), rq, 1);
2317 2328
2318 watchdog(rq, p); 2329 watchdog(rq, p);
2319 2330
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c7742dcc136c..4a2e8cae63c4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -594,6 +594,7 @@ struct rt_rq {
594 unsigned long rt_nr_total; 594 unsigned long rt_nr_total;
595 int overloaded; 595 int overloaded;
596 struct plist_head pushable_tasks; 596 struct plist_head pushable_tasks;
597
597#endif /* CONFIG_SMP */ 598#endif /* CONFIG_SMP */
598 int rt_queued; 599 int rt_queued;
599 600
@@ -673,7 +674,26 @@ struct dl_rq {
673 u64 bw_ratio; 674 u64 bw_ratio;
674}; 675};
675 676
677#ifdef CONFIG_FAIR_GROUP_SCHED
678/* An entity is a task if it doesn't "own" a runqueue */
679#define entity_is_task(se) (!se->my_q)
680#else
681#define entity_is_task(se) 1
682#endif
683
676#ifdef CONFIG_SMP 684#ifdef CONFIG_SMP
685/*
686 * XXX we want to get rid of these helpers and use the full load resolution.
687 */
688static inline long se_weight(struct sched_entity *se)
689{
690 return scale_load_down(se->load.weight);
691}
692
693static inline long se_runnable(struct sched_entity *se)
694{
695 return scale_load_down(se->runnable_weight);
696}
677 697
678static inline bool sched_asym_prefer(int a, int b) 698static inline bool sched_asym_prefer(int a, int b)
679{ 699{
@@ -833,8 +853,12 @@ struct rq {
833 853
834 struct list_head cfs_tasks; 854 struct list_head cfs_tasks;
835 855
836 u64 rt_avg; 856 struct sched_avg avg_rt;
837 u64 age_stamp; 857 struct sched_avg avg_dl;
858#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
859#define HAVE_SCHED_AVG_IRQ
860 struct sched_avg avg_irq;
861#endif
838 u64 idle_stamp; 862 u64 idle_stamp;
839 u64 avg_idle; 863 u64 avg_idle;
840 864
@@ -1075,7 +1099,8 @@ enum numa_faults_stats {
1075}; 1099};
1076extern void sched_setnuma(struct task_struct *p, int node); 1100extern void sched_setnuma(struct task_struct *p, int node);
1077extern int migrate_task_to(struct task_struct *p, int cpu); 1101extern int migrate_task_to(struct task_struct *p, int cpu);
1078extern int migrate_swap(struct task_struct *, struct task_struct *); 1102extern int migrate_swap(struct task_struct *p, struct task_struct *t,
1103 int cpu, int scpu);
1079extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p); 1104extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
1080#else 1105#else
1081static inline void 1106static inline void
@@ -1690,15 +1715,9 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
1690 1715
1691extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); 1716extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
1692 1717
1693extern const_debug unsigned int sysctl_sched_time_avg;
1694extern const_debug unsigned int sysctl_sched_nr_migrate; 1718extern const_debug unsigned int sysctl_sched_nr_migrate;
1695extern const_debug unsigned int sysctl_sched_migration_cost; 1719extern const_debug unsigned int sysctl_sched_migration_cost;
1696 1720
1697static inline u64 sched_avg_period(void)
1698{
1699 return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
1700}
1701
1702#ifdef CONFIG_SCHED_HRTICK 1721#ifdef CONFIG_SCHED_HRTICK
1703 1722
1704/* 1723/*
@@ -1735,8 +1754,6 @@ unsigned long arch_scale_freq_capacity(int cpu)
1735#endif 1754#endif
1736 1755
1737#ifdef CONFIG_SMP 1756#ifdef CONFIG_SMP
1738extern void sched_avg_update(struct rq *rq);
1739
1740#ifndef arch_scale_cpu_capacity 1757#ifndef arch_scale_cpu_capacity
1741static __always_inline 1758static __always_inline
1742unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) 1759unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
@@ -1747,12 +1764,6 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
1747 return SCHED_CAPACITY_SCALE; 1764 return SCHED_CAPACITY_SCALE;
1748} 1765}
1749#endif 1766#endif
1750
1751static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
1752{
1753 rq->rt_avg += rt_delta * arch_scale_freq_capacity(cpu_of(rq));
1754 sched_avg_update(rq);
1755}
1756#else 1767#else
1757#ifndef arch_scale_cpu_capacity 1768#ifndef arch_scale_cpu_capacity
1758static __always_inline 1769static __always_inline
@@ -1761,8 +1772,6 @@ unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu)
1761 return SCHED_CAPACITY_SCALE; 1772 return SCHED_CAPACITY_SCALE;
1762} 1773}
1763#endif 1774#endif
1764static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
1765static inline void sched_avg_update(struct rq *rq) { }
1766#endif 1775#endif
1767 1776
1768struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) 1777struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
@@ -2177,11 +2186,16 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
2177#endif 2186#endif
2178 2187
2179#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL 2188#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
2180static inline unsigned long cpu_util_dl(struct rq *rq) 2189static inline unsigned long cpu_bw_dl(struct rq *rq)
2181{ 2190{
2182 return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; 2191 return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
2183} 2192}
2184 2193
2194static inline unsigned long cpu_util_dl(struct rq *rq)
2195{
2196 return READ_ONCE(rq->avg_dl.util_avg);
2197}
2198
2185static inline unsigned long cpu_util_cfs(struct rq *rq) 2199static inline unsigned long cpu_util_cfs(struct rq *rq)
2186{ 2200{
2187 unsigned long util = READ_ONCE(rq->cfs.avg.util_avg); 2201 unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
@@ -2193,4 +2207,37 @@ static inline unsigned long cpu_util_cfs(struct rq *rq)
2193 2207
2194 return util; 2208 return util;
2195} 2209}
2210
2211static inline unsigned long cpu_util_rt(struct rq *rq)
2212{
2213 return READ_ONCE(rq->avg_rt.util_avg);
2214}
2215#endif
2216
2217#ifdef HAVE_SCHED_AVG_IRQ
2218static inline unsigned long cpu_util_irq(struct rq *rq)
2219{
2220 return rq->avg_irq.util_avg;
2221}
2222
2223static inline
2224unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
2225{
2226 util *= (max - irq);
2227 util /= max;
2228
2229 return util;
2230
2231}
2232#else
2233static inline unsigned long cpu_util_irq(struct rq *rq)
2234{
2235 return 0;
2236}
2237
2238static inline
2239unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
2240{
2241 return util;
2242}
2196#endif 2243#endif
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index b6fb2c3b3ff7..66b59ac77c22 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -32,7 +32,7 @@ void swake_up_locked(struct swait_queue_head *q)
32} 32}
33EXPORT_SYMBOL(swake_up_locked); 33EXPORT_SYMBOL(swake_up_locked);
34 34
35void swake_up(struct swait_queue_head *q) 35void swake_up_one(struct swait_queue_head *q)
36{ 36{
37 unsigned long flags; 37 unsigned long flags;
38 38
@@ -40,7 +40,7 @@ void swake_up(struct swait_queue_head *q)
40 swake_up_locked(q); 40 swake_up_locked(q);
41 raw_spin_unlock_irqrestore(&q->lock, flags); 41 raw_spin_unlock_irqrestore(&q->lock, flags);
42} 42}
43EXPORT_SYMBOL(swake_up); 43EXPORT_SYMBOL(swake_up_one);
44 44
45/* 45/*
46 * Does not allow usage from IRQ disabled, since we must be able to 46 * Does not allow usage from IRQ disabled, since we must be able to
@@ -69,14 +69,14 @@ void swake_up_all(struct swait_queue_head *q)
69} 69}
70EXPORT_SYMBOL(swake_up_all); 70EXPORT_SYMBOL(swake_up_all);
71 71
72void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait) 72static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
73{ 73{
74 wait->task = current; 74 wait->task = current;
75 if (list_empty(&wait->task_list)) 75 if (list_empty(&wait->task_list))
76 list_add(&wait->task_list, &q->task_list); 76 list_add_tail(&wait->task_list, &q->task_list);
77} 77}
78 78
79void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state) 79void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state)
80{ 80{
81 unsigned long flags; 81 unsigned long flags;
82 82
@@ -85,16 +85,28 @@ void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int
85 set_current_state(state); 85 set_current_state(state);
86 raw_spin_unlock_irqrestore(&q->lock, flags); 86 raw_spin_unlock_irqrestore(&q->lock, flags);
87} 87}
88EXPORT_SYMBOL(prepare_to_swait); 88EXPORT_SYMBOL(prepare_to_swait_exclusive);
89 89
90long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state) 90long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
91{ 91{
92 if (signal_pending_state(state, current)) 92 unsigned long flags;
93 return -ERESTARTSYS; 93 long ret = 0;
94 94
95 prepare_to_swait(q, wait, state); 95 raw_spin_lock_irqsave(&q->lock, flags);
96 if (unlikely(signal_pending_state(state, current))) {
97 /*
98 * See prepare_to_wait_event(). TL;DR, subsequent swake_up_one()
99 * must not see us.
100 */
101 list_del_init(&wait->task_list);
102 ret = -ERESTARTSYS;
103 } else {
104 __prepare_to_swait(q, wait);
105 set_current_state(state);
106 }
107 raw_spin_unlock_irqrestore(&q->lock, flags);
96 108
97 return 0; 109 return ret;
98} 110}
99EXPORT_SYMBOL(prepare_to_swait_event); 111EXPORT_SYMBOL(prepare_to_swait_event);
100 112
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 928be527477e..870f97b313e3 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -134,8 +134,8 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int
134 * @nr_exclusive: how many wake-one or wake-many threads to wake up 134 * @nr_exclusive: how many wake-one or wake-many threads to wake up
135 * @key: is directly passed to the wakeup function 135 * @key: is directly passed to the wakeup function
136 * 136 *
137 * It may be assumed that this function implies a write memory barrier before 137 * If this function wakes up a task, it executes a full memory barrier before
138 * changing the task state if and only if any tasks are woken up. 138 * accessing the task state.
139 */ 139 */
140void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, 140void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
141 int nr_exclusive, void *key) 141 int nr_exclusive, void *key)
@@ -180,8 +180,8 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
180 * 180 *
181 * On UP it can prevent extra preemption. 181 * On UP it can prevent extra preemption.
182 * 182 *
183 * It may be assumed that this function implies a write memory barrier before 183 * If this function wakes up a task, it executes a full memory barrier before
184 * changing the task state if and only if any tasks are woken up. 184 * accessing the task state.
185 */ 185 */
186void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, 186void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
187 int nr_exclusive, void *key) 187 int nr_exclusive, void *key)
@@ -392,35 +392,36 @@ static inline bool is_kthread_should_stop(void)
392 * if (condition) 392 * if (condition)
393 * break; 393 * break;
394 * 394 *
395 * p->state = mode; condition = true; 395 * // in wait_woken() // in woken_wake_function()
396 * smp_mb(); // A smp_wmb(); // C
397 * if (!wq_entry->flags & WQ_FLAG_WOKEN) wq_entry->flags |= WQ_FLAG_WOKEN;
398 * schedule() try_to_wake_up();
399 * p->state = TASK_RUNNING; ~~~~~~~~~~~~~~~~~~
400 * wq_entry->flags &= ~WQ_FLAG_WOKEN; condition = true;
401 * smp_mb() // B smp_wmb(); // C
402 * wq_entry->flags |= WQ_FLAG_WOKEN;
403 * }
404 * remove_wait_queue(&wq_head, &wait);
405 * 396 *
397 * p->state = mode; wq_entry->flags |= WQ_FLAG_WOKEN;
398 * smp_mb(); // A try_to_wake_up():
399 * if (!(wq_entry->flags & WQ_FLAG_WOKEN)) <full barrier>
400 * schedule() if (p->state & mode)
401 * p->state = TASK_RUNNING; p->state = TASK_RUNNING;
402 * wq_entry->flags &= ~WQ_FLAG_WOKEN; ~~~~~~~~~~~~~~~~~~
403 * smp_mb(); // B condition = true;
404 * } smp_mb(); // C
405 * remove_wait_queue(&wq_head, &wait); wq_entry->flags |= WQ_FLAG_WOKEN;
406 */ 406 */
407long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout) 407long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
408{ 408{
409 set_current_state(mode); /* A */
410 /* 409 /*
411 * The above implies an smp_mb(), which matches with the smp_wmb() from 410 * The below executes an smp_mb(), which matches with the full barrier
412 * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must 411 * executed by the try_to_wake_up() in woken_wake_function() such that
413 * also observe all state before the wakeup. 412 * either we see the store to wq_entry->flags in woken_wake_function()
413 * or woken_wake_function() sees our store to current->state.
414 */ 414 */
415 set_current_state(mode); /* A */
415 if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) 416 if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
416 timeout = schedule_timeout(timeout); 417 timeout = schedule_timeout(timeout);
417 __set_current_state(TASK_RUNNING); 418 __set_current_state(TASK_RUNNING);
418 419
419 /* 420 /*
420 * The below implies an smp_mb(), it too pairs with the smp_wmb() from 421 * The below executes an smp_mb(), which matches with the smp_mb() (C)
421 * woken_wake_function() such that we must either observe the wait 422 * in woken_wake_function() such that either we see the wait condition
422 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss 423 * being true or the store to wq_entry->flags in woken_wake_function()
423 * an event. 424 * follows ours in the coherence order.
424 */ 425 */
425 smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */ 426 smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
426 427
@@ -430,14 +431,8 @@ EXPORT_SYMBOL(wait_woken);
430 431
431int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key) 432int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
432{ 433{
433 /* 434 /* Pairs with the smp_store_mb() in wait_woken(). */
434 * Although this function is called under waitqueue lock, LOCK 435 smp_mb(); /* C */
435 * doesn't imply write barrier and the users expects write
436 * barrier semantics on wakeup functions. The following
437 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
438 * and is paired with smp_store_mb() in wait_woken().
439 */
440 smp_wmb(); /* C */
441 wq_entry->flags |= WQ_FLAG_WOKEN; 436 wq_entry->flags |= WQ_FLAG_WOKEN;
442 437
443 return default_wake_function(wq_entry, mode, sync, key); 438 return default_wake_function(wq_entry, mode, sync, key);
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 5043e7433f4b..c230c2dd48e1 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -238,8 +238,7 @@ int smpboot_unpark_threads(unsigned int cpu)
238 238
239 mutex_lock(&smpboot_threads_lock); 239 mutex_lock(&smpboot_threads_lock);
240 list_for_each_entry(cur, &hotplug_threads, list) 240 list_for_each_entry(cur, &hotplug_threads, list)
241 if (cpumask_test_cpu(cpu, cur->cpumask)) 241 smpboot_unpark_thread(cur, cpu);
242 smpboot_unpark_thread(cur, cpu);
243 mutex_unlock(&smpboot_threads_lock); 242 mutex_unlock(&smpboot_threads_lock);
244 return 0; 243 return 0;
245} 244}
@@ -280,34 +279,26 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
280} 279}
281 280
282/** 281/**
283 * smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related 282 * smpboot_register_percpu_thread - Register a per_cpu thread related
284 * to hotplug 283 * to hotplug
285 * @plug_thread: Hotplug thread descriptor 284 * @plug_thread: Hotplug thread descriptor
286 * @cpumask: The cpumask where threads run
287 * 285 *
288 * Creates and starts the threads on all online cpus. 286 * Creates and starts the threads on all online cpus.
289 */ 287 */
290int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, 288int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
291 const struct cpumask *cpumask)
292{ 289{
293 unsigned int cpu; 290 unsigned int cpu;
294 int ret = 0; 291 int ret = 0;
295 292
296 if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL))
297 return -ENOMEM;
298 cpumask_copy(plug_thread->cpumask, cpumask);
299
300 get_online_cpus(); 293 get_online_cpus();
301 mutex_lock(&smpboot_threads_lock); 294 mutex_lock(&smpboot_threads_lock);
302 for_each_online_cpu(cpu) { 295 for_each_online_cpu(cpu) {
303 ret = __smpboot_create_thread(plug_thread, cpu); 296 ret = __smpboot_create_thread(plug_thread, cpu);
304 if (ret) { 297 if (ret) {
305 smpboot_destroy_threads(plug_thread); 298 smpboot_destroy_threads(plug_thread);
306 free_cpumask_var(plug_thread->cpumask);
307 goto out; 299 goto out;
308 } 300 }
309 if (cpumask_test_cpu(cpu, cpumask)) 301 smpboot_unpark_thread(plug_thread, cpu);
310 smpboot_unpark_thread(plug_thread, cpu);
311 } 302 }
312 list_add(&plug_thread->list, &hotplug_threads); 303 list_add(&plug_thread->list, &hotplug_threads);
313out: 304out:
@@ -315,7 +306,7 @@ out:
315 put_online_cpus(); 306 put_online_cpus();
316 return ret; 307 return ret;
317} 308}
318EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread_cpumask); 309EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
319 310
320/** 311/**
321 * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug 312 * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug
@@ -331,44 +322,9 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
331 smpboot_destroy_threads(plug_thread); 322 smpboot_destroy_threads(plug_thread);
332 mutex_unlock(&smpboot_threads_lock); 323 mutex_unlock(&smpboot_threads_lock);
333 put_online_cpus(); 324 put_online_cpus();
334 free_cpumask_var(plug_thread->cpumask);
335} 325}
336EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); 326EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
337 327
338/**
339 * smpboot_update_cpumask_percpu_thread - Adjust which per_cpu hotplug threads stay parked
340 * @plug_thread: Hotplug thread descriptor
341 * @new: Revised mask to use
342 *
343 * The cpumask field in the smp_hotplug_thread must not be updated directly
344 * by the client, but only by calling this function.
345 * This function can only be called on a registered smp_hotplug_thread.
346 */
347void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
348 const struct cpumask *new)
349{
350 struct cpumask *old = plug_thread->cpumask;
351 static struct cpumask tmp;
352 unsigned int cpu;
353
354 lockdep_assert_cpus_held();
355 mutex_lock(&smpboot_threads_lock);
356
357 /* Park threads that were exclusively enabled on the old mask. */
358 cpumask_andnot(&tmp, old, new);
359 for_each_cpu_and(cpu, &tmp, cpu_online_mask)
360 smpboot_park_thread(plug_thread, cpu);
361
362 /* Unpark threads that are exclusively enabled on the new mask. */
363 cpumask_andnot(&tmp, new, old);
364 for_each_cpu_and(cpu, &tmp, cpu_online_mask)
365 smpboot_unpark_thread(plug_thread, cpu);
366
367 cpumask_copy(old, new);
368
369 mutex_unlock(&smpboot_threads_lock);
370}
371
372static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); 328static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
373 329
374/* 330/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 75ffc1d1a2e0..6f584861d329 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -390,7 +390,7 @@ static inline void tick_irq_exit(void)
390 390
391 /* Make sure that timer wheel updates are propagated */ 391 /* Make sure that timer wheel updates are propagated */
392 if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { 392 if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
393 if (!in_interrupt()) 393 if (!in_irq())
394 tick_nohz_irq_exit(); 394 tick_nohz_irq_exit();
395 } 395 }
396#endif 396#endif
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e190d1ef3a23..067cb83f37ea 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -81,6 +81,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
81 unsigned long flags; 81 unsigned long flags;
82 bool enabled; 82 bool enabled;
83 83
84 preempt_disable();
84 raw_spin_lock_irqsave(&stopper->lock, flags); 85 raw_spin_lock_irqsave(&stopper->lock, flags);
85 enabled = stopper->enabled; 86 enabled = stopper->enabled;
86 if (enabled) 87 if (enabled)
@@ -90,6 +91,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
90 raw_spin_unlock_irqrestore(&stopper->lock, flags); 91 raw_spin_unlock_irqrestore(&stopper->lock, flags);
91 92
92 wake_up_q(&wakeq); 93 wake_up_q(&wakeq);
94 preempt_enable();
93 95
94 return enabled; 96 return enabled;
95} 97}
@@ -236,13 +238,24 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
236 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); 238 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
237 DEFINE_WAKE_Q(wakeq); 239 DEFINE_WAKE_Q(wakeq);
238 int err; 240 int err;
241
239retry: 242retry:
243 /*
244 * The waking up of stopper threads has to happen in the same
245 * scheduling context as the queueing. Otherwise, there is a
246 * possibility of one of the above stoppers being woken up by another
247 * CPU, and preempting us. This will cause us to not wake up the other
248 * stopper forever.
249 */
250 preempt_disable();
240 raw_spin_lock_irq(&stopper1->lock); 251 raw_spin_lock_irq(&stopper1->lock);
241 raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); 252 raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
242 253
243 err = -ENOENT; 254 if (!stopper1->enabled || !stopper2->enabled) {
244 if (!stopper1->enabled || !stopper2->enabled) 255 err = -ENOENT;
245 goto unlock; 256 goto unlock;
257 }
258
246 /* 259 /*
247 * Ensure that if we race with __stop_cpus() the stoppers won't get 260 * Ensure that if we race with __stop_cpus() the stoppers won't get
248 * queued up in reverse order leading to system deadlock. 261 * queued up in reverse order leading to system deadlock.
@@ -253,36 +266,30 @@ retry:
253 * It can be falsely true but it is safe to spin until it is cleared, 266 * It can be falsely true but it is safe to spin until it is cleared,
254 * queue_stop_cpus_work() does everything under preempt_disable(). 267 * queue_stop_cpus_work() does everything under preempt_disable().
255 */ 268 */
256 err = -EDEADLK; 269 if (unlikely(stop_cpus_in_progress)) {
257 if (unlikely(stop_cpus_in_progress)) 270 err = -EDEADLK;
258 goto unlock; 271 goto unlock;
272 }
259 273
260 err = 0; 274 err = 0;
261 __cpu_stop_queue_work(stopper1, work1, &wakeq); 275 __cpu_stop_queue_work(stopper1, work1, &wakeq);
262 __cpu_stop_queue_work(stopper2, work2, &wakeq); 276 __cpu_stop_queue_work(stopper2, work2, &wakeq);
263 /* 277
264 * The waking up of stopper threads has to happen
265 * in the same scheduling context as the queueing.
266 * Otherwise, there is a possibility of one of the
267 * above stoppers being woken up by another CPU,
268 * and preempting us. This will cause us to n ot
269 * wake up the other stopper forever.
270 */
271 preempt_disable();
272unlock: 278unlock:
273 raw_spin_unlock(&stopper2->lock); 279 raw_spin_unlock(&stopper2->lock);
274 raw_spin_unlock_irq(&stopper1->lock); 280 raw_spin_unlock_irq(&stopper1->lock);
275 281
276 if (unlikely(err == -EDEADLK)) { 282 if (unlikely(err == -EDEADLK)) {
283 preempt_enable();
284
277 while (stop_cpus_in_progress) 285 while (stop_cpus_in_progress)
278 cpu_relax(); 286 cpu_relax();
287
279 goto retry; 288 goto retry;
280 } 289 }
281 290
282 if (!err) { 291 wake_up_q(&wakeq);
283 wake_up_q(&wakeq); 292 preempt_enable();
284 preempt_enable();
285 }
286 293
287 return err; 294 return err;
288} 295}
diff --git a/kernel/sys.c b/kernel/sys.c
index 38509dc1f77b..e27b51d3facd 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2512,11 +2512,11 @@ static int do_sysinfo(struct sysinfo *info)
2512{ 2512{
2513 unsigned long mem_total, sav_total; 2513 unsigned long mem_total, sav_total;
2514 unsigned int mem_unit, bitcount; 2514 unsigned int mem_unit, bitcount;
2515 struct timespec tp; 2515 struct timespec64 tp;
2516 2516
2517 memset(info, 0, sizeof(struct sysinfo)); 2517 memset(info, 0, sizeof(struct sysinfo));
2518 2518
2519 get_monotonic_boottime(&tp); 2519 ktime_get_boottime_ts64(&tp);
2520 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 2520 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
2521 2521
2522 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 2522 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2d9837c0aff4..f22f76b7a138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -368,14 +368,6 @@ static struct ctl_table kern_table[] = {
368 .mode = 0644, 368 .mode = 0644,
369 .proc_handler = proc_dointvec, 369 .proc_handler = proc_dointvec,
370 }, 370 },
371 {
372 .procname = "sched_time_avg_ms",
373 .data = &sysctl_sched_time_avg,
374 .maxlen = sizeof(unsigned int),
375 .mode = 0644,
376 .proc_handler = proc_dointvec_minmax,
377 .extra1 = &one,
378 },
379#ifdef CONFIG_SCHEDSTATS 371#ifdef CONFIG_SCHEDSTATS
380 { 372 {
381 .procname = "sched_schedstats", 373 .procname = "sched_schedstats",
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 639321bf2e39..fa5de5e8de61 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -581,11 +581,11 @@ static void alarm_timer_rearm(struct k_itimer *timr)
581 * @timr: Pointer to the posixtimer data struct 581 * @timr: Pointer to the posixtimer data struct
582 * @now: Current time to forward the timer against 582 * @now: Current time to forward the timer against
583 */ 583 */
584static int alarm_timer_forward(struct k_itimer *timr, ktime_t now) 584static s64 alarm_timer_forward(struct k_itimer *timr, ktime_t now)
585{ 585{
586 struct alarm *alarm = &timr->it.alarm.alarmtimer; 586 struct alarm *alarm = &timr->it.alarm.alarmtimer;
587 587
588 return (int) alarm_forward(alarm, timr->it_interval, now); 588 return alarm_forward(alarm, timr->it_interval, now);
589} 589}
590 590
591/** 591/**
@@ -808,7 +808,8 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
808 /* Convert (if necessary) to absolute time */ 808 /* Convert (if necessary) to absolute time */
809 if (flags != TIMER_ABSTIME) { 809 if (flags != TIMER_ABSTIME) {
810 ktime_t now = alarm_bases[type].gettime(); 810 ktime_t now = alarm_bases[type].gettime();
811 exp = ktime_add(now, exp); 811
812 exp = ktime_add_safe(now, exp);
812 } 813 }
813 814
814 ret = alarmtimer_do_nsleep(&alarm, exp, type); 815 ret = alarmtimer_do_nsleep(&alarm, exp, type);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 16c027e9cc73..8c0e4092f661 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -463,6 +463,12 @@ void clockevents_register_device(struct clock_event_device *dev)
463 dev->cpumask = cpumask_of(smp_processor_id()); 463 dev->cpumask = cpumask_of(smp_processor_id());
464 } 464 }
465 465
466 if (dev->cpumask == cpu_all_mask) {
467 WARN(1, "%s cpumask == cpu_all_mask, using cpu_possible_mask instead\n",
468 dev->name);
469 dev->cpumask = cpu_possible_mask;
470 }
471
466 raw_spin_lock_irqsave(&clockevents_lock, flags); 472 raw_spin_lock_irqsave(&clockevents_lock, flags);
467 473
468 list_add(&dev->list, &clockevent_devices); 474 list_add(&dev->list, &clockevent_devices);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f89a78e2792b..f74fb00d8064 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -94,6 +94,8 @@ EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
94/*[Clocksource internal variables]--------- 94/*[Clocksource internal variables]---------
95 * curr_clocksource: 95 * curr_clocksource:
96 * currently selected clocksource. 96 * currently selected clocksource.
97 * suspend_clocksource:
98 * used to calculate the suspend time.
97 * clocksource_list: 99 * clocksource_list:
98 * linked list with the registered clocksources 100 * linked list with the registered clocksources
99 * clocksource_mutex: 101 * clocksource_mutex:
@@ -102,10 +104,12 @@ EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
102 * Name of the user-specified clocksource. 104 * Name of the user-specified clocksource.
103 */ 105 */
104static struct clocksource *curr_clocksource; 106static struct clocksource *curr_clocksource;
107static struct clocksource *suspend_clocksource;
105static LIST_HEAD(clocksource_list); 108static LIST_HEAD(clocksource_list);
106static DEFINE_MUTEX(clocksource_mutex); 109static DEFINE_MUTEX(clocksource_mutex);
107static char override_name[CS_NAME_LEN]; 110static char override_name[CS_NAME_LEN];
108static int finished_booting; 111static int finished_booting;
112static u64 suspend_start;
109 113
110#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 114#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
111static void clocksource_watchdog_work(struct work_struct *work); 115static void clocksource_watchdog_work(struct work_struct *work);
@@ -447,6 +451,140 @@ static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
447 451
448#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ 452#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
449 453
454static bool clocksource_is_suspend(struct clocksource *cs)
455{
456 return cs == suspend_clocksource;
457}
458
459static void __clocksource_suspend_select(struct clocksource *cs)
460{
461 /*
462 * Skip the clocksource which will be stopped in suspend state.
463 */
464 if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
465 return;
466
467 /*
468 * The nonstop clocksource can be selected as the suspend clocksource to
469 * calculate the suspend time, so it should not supply suspend/resume
470 * interfaces to suspend the nonstop clocksource when system suspends.
471 */
472 if (cs->suspend || cs->resume) {
473 pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
474 cs->name);
475 }
476
477 /* Pick the best rating. */
478 if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
479 suspend_clocksource = cs;
480}
481
482/**
483 * clocksource_suspend_select - Select the best clocksource for suspend timing
484 * @fallback: if select a fallback clocksource
485 */
486static void clocksource_suspend_select(bool fallback)
487{
488 struct clocksource *cs, *old_suspend;
489
490 old_suspend = suspend_clocksource;
491 if (fallback)
492 suspend_clocksource = NULL;
493
494 list_for_each_entry(cs, &clocksource_list, list) {
495 /* Skip current if we were requested for a fallback. */
496 if (fallback && cs == old_suspend)
497 continue;
498
499 __clocksource_suspend_select(cs);
500 }
501}
502
503/**
504 * clocksource_start_suspend_timing - Start measuring the suspend timing
505 * @cs: current clocksource from timekeeping
506 * @start_cycles: current cycles from timekeeping
507 *
508 * This function will save the start cycle values of suspend timer to calculate
509 * the suspend time when resuming system.
510 *
511 * This function is called late in the suspend process from timekeeping_suspend(),
512 * that means processes are freezed, non-boot cpus and interrupts are disabled
513 * now. It is therefore possible to start the suspend timer without taking the
514 * clocksource mutex.
515 */
516void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
517{
518 if (!suspend_clocksource)
519 return;
520
521 /*
522 * If current clocksource is the suspend timer, we should use the
523 * tkr_mono.cycle_last value as suspend_start to avoid same reading
524 * from suspend timer.
525 */
526 if (clocksource_is_suspend(cs)) {
527 suspend_start = start_cycles;
528 return;
529 }
530
531 if (suspend_clocksource->enable &&
532 suspend_clocksource->enable(suspend_clocksource)) {
533 pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
534 return;
535 }
536
537 suspend_start = suspend_clocksource->read(suspend_clocksource);
538}
539
540/**
541 * clocksource_stop_suspend_timing - Stop measuring the suspend timing
542 * @cs: current clocksource from timekeeping
543 * @cycle_now: current cycles from timekeeping
544 *
545 * This function will calculate the suspend time from suspend timer.
546 *
547 * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
548 *
549 * This function is called early in the resume process from timekeeping_resume(),
550 * that means there is only one cpu, no processes are running and the interrupts
551 * are disabled. It is therefore possible to stop the suspend timer without
552 * taking the clocksource mutex.
553 */
554u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
555{
556 u64 now, delta, nsec = 0;
557
558 if (!suspend_clocksource)
559 return 0;
560
561 /*
562 * If current clocksource is the suspend timer, we should use the
563 * tkr_mono.cycle_last value from timekeeping as current cycle to
564 * avoid same reading from suspend timer.
565 */
566 if (clocksource_is_suspend(cs))
567 now = cycle_now;
568 else
569 now = suspend_clocksource->read(suspend_clocksource);
570
571 if (now > suspend_start) {
572 delta = clocksource_delta(now, suspend_start,
573 suspend_clocksource->mask);
574 nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
575 suspend_clocksource->shift);
576 }
577
578 /*
579 * Disable the suspend timer to save power if current clocksource is
580 * not the suspend timer.
581 */
582 if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
583 suspend_clocksource->disable(suspend_clocksource);
584
585 return nsec;
586}
587
450/** 588/**
451 * clocksource_suspend - suspend the clocksource(s) 589 * clocksource_suspend - suspend the clocksource(s)
452 */ 590 */
@@ -792,6 +930,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
792 930
793 clocksource_select(); 931 clocksource_select();
794 clocksource_select_watchdog(false); 932 clocksource_select_watchdog(false);
933 __clocksource_suspend_select(cs);
795 mutex_unlock(&clocksource_mutex); 934 mutex_unlock(&clocksource_mutex);
796 return 0; 935 return 0;
797} 936}
@@ -820,6 +959,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
820 959
821 clocksource_select(); 960 clocksource_select();
822 clocksource_select_watchdog(false); 961 clocksource_select_watchdog(false);
962 clocksource_suspend_select(false);
823 mutex_unlock(&clocksource_mutex); 963 mutex_unlock(&clocksource_mutex);
824} 964}
825EXPORT_SYMBOL(clocksource_change_rating); 965EXPORT_SYMBOL(clocksource_change_rating);
@@ -845,6 +985,15 @@ static int clocksource_unbind(struct clocksource *cs)
845 return -EBUSY; 985 return -EBUSY;
846 } 986 }
847 987
988 if (clocksource_is_suspend(cs)) {
989 /*
990 * Select and try to install a replacement suspend clocksource.
991 * If no replacement suspend clocksource, we will just let the
992 * clocksource go and have no suspend clocksource.
993 */
994 clocksource_suspend_select(true);
995 }
996
848 clocksource_watchdog_lock(&flags); 997 clocksource_watchdog_lock(&flags);
849 clocksource_dequeue_watchdog(cs); 998 clocksource_dequeue_watchdog(cs);
850 list_del_init(&cs->list); 999 list_del_init(&cs->list);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3e93c54bd3a1..e1a549c9e399 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -718,8 +718,8 @@ static void hrtimer_switch_to_hres(void)
718 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 718 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
719 719
720 if (tick_init_highres()) { 720 if (tick_init_highres()) {
721 printk(KERN_WARNING "Could not switch to high resolution " 721 pr_warn("Could not switch to high resolution mode on CPU %u\n",
722 "mode on CPU %d\n", base->cpu); 722 base->cpu);
723 return; 723 return;
724 } 724 }
725 base->hres_active = 1; 725 base->hres_active = 1;
@@ -1573,8 +1573,7 @@ retry:
1573 else 1573 else
1574 expires_next = ktime_add(now, delta); 1574 expires_next = ktime_add(now, delta);
1575 tick_program_event(expires_next, 1); 1575 tick_program_event(expires_next, 1);
1576 printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", 1576 pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
1577 ktime_to_ns(delta));
1578} 1577}
1579 1578
1580/* called with interrupts disabled */ 1579/* called with interrupts disabled */
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index a09ded765f6c..c5e0cba3b39c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -502,7 +502,7 @@ static void sched_sync_hw_clock(struct timespec64 now,
502{ 502{
503 struct timespec64 next; 503 struct timespec64 next;
504 504
505 getnstimeofday64(&next); 505 ktime_get_real_ts64(&next);
506 if (!fail) 506 if (!fail)
507 next.tv_sec = 659; 507 next.tv_sec = 659;
508 else { 508 else {
@@ -537,7 +537,7 @@ static void sync_rtc_clock(void)
537 if (!IS_ENABLED(CONFIG_RTC_SYSTOHC)) 537 if (!IS_ENABLED(CONFIG_RTC_SYSTOHC))
538 return; 538 return;
539 539
540 getnstimeofday64(&now); 540 ktime_get_real_ts64(&now);
541 541
542 adjust = now; 542 adjust = now;
543 if (persistent_clock_is_local) 543 if (persistent_clock_is_local)
@@ -591,7 +591,7 @@ static bool sync_cmos_clock(void)
591 * Architectures are strongly encouraged to use rtclib and not 591 * Architectures are strongly encouraged to use rtclib and not
592 * implement this legacy API. 592 * implement this legacy API.
593 */ 593 */
594 getnstimeofday64(&now); 594 ktime_get_real_ts64(&now);
595 if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) { 595 if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) {
596 if (persistent_clock_is_local) 596 if (persistent_clock_is_local)
597 adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); 597 adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
@@ -642,7 +642,7 @@ void ntp_notify_cmos_timer(void)
642/* 642/*
643 * Propagate a new txc->status value into the NTP state: 643 * Propagate a new txc->status value into the NTP state:
644 */ 644 */
645static inline void process_adj_status(struct timex *txc, struct timespec64 *ts) 645static inline void process_adj_status(const struct timex *txc)
646{ 646{
647 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { 647 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
648 time_state = TIME_OK; 648 time_state = TIME_OK;
@@ -665,12 +665,10 @@ static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
665} 665}
666 666
667 667
668static inline void process_adjtimex_modes(struct timex *txc, 668static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai)
669 struct timespec64 *ts,
670 s32 *time_tai)
671{ 669{
672 if (txc->modes & ADJ_STATUS) 670 if (txc->modes & ADJ_STATUS)
673 process_adj_status(txc, ts); 671 process_adj_status(txc);
674 672
675 if (txc->modes & ADJ_NANO) 673 if (txc->modes & ADJ_NANO)
676 time_status |= STA_NANO; 674 time_status |= STA_NANO;
@@ -718,7 +716,7 @@ static inline void process_adjtimex_modes(struct timex *txc,
718 * adjtimex mainly allows reading (and writing, if superuser) of 716 * adjtimex mainly allows reading (and writing, if superuser) of
719 * kernel time-keeping variables. used by xntpd. 717 * kernel time-keeping variables. used by xntpd.
720 */ 718 */
721int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai) 719int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai)
722{ 720{
723 int result; 721 int result;
724 722
@@ -735,7 +733,7 @@ int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
735 733
736 /* If there are input parameters, then process them: */ 734 /* If there are input parameters, then process them: */
737 if (txc->modes) 735 if (txc->modes)
738 process_adjtimex_modes(txc, ts, time_tai); 736 process_adjtimex_modes(txc, time_tai);
739 737
740 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, 738 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
741 NTP_SCALE_SHIFT); 739 NTP_SCALE_SHIFT);
@@ -1022,12 +1020,11 @@ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_t
1022 1020
1023static int __init ntp_tick_adj_setup(char *str) 1021static int __init ntp_tick_adj_setup(char *str)
1024{ 1022{
1025 int rc = kstrtol(str, 0, (long *)&ntp_tick_adj); 1023 int rc = kstrtos64(str, 0, &ntp_tick_adj);
1026
1027 if (rc) 1024 if (rc)
1028 return rc; 1025 return rc;
1029 ntp_tick_adj <<= NTP_SCALE_SHIFT;
1030 1026
1027 ntp_tick_adj <<= NTP_SCALE_SHIFT;
1031 return 1; 1028 return 1;
1032} 1029}
1033 1030
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 909bd1f1bfb1..c24b0e13f011 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -8,6 +8,6 @@ extern void ntp_clear(void);
8extern u64 ntp_tick_length(void); 8extern u64 ntp_tick_length(void);
9extern ktime_t ntp_get_next_leap(void); 9extern ktime_t ntp_get_next_leap(void);
10extern int second_overflow(time64_t secs); 10extern int second_overflow(time64_t secs);
11extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *); 11extern int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai);
12extern void __hardpps(const struct timespec64 *, const struct timespec64 *); 12extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
13#endif /* _LINUX_NTP_INTERNAL_H */ 13#endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 9cdf54b04ca8..294d7b65af33 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -85,7 +85,7 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
85 continue; 85 continue;
86 86
87 timer->it.cpu.expires += incr; 87 timer->it.cpu.expires += incr;
88 timer->it_overrun += 1 << i; 88 timer->it_overrun += 1LL << i;
89 delta -= incr; 89 delta -= incr;
90 } 90 }
91} 91}
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 26aa9569e24a..2c6847d5d69b 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -81,7 +81,7 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)
81 ktime_get_ts64(tp); 81 ktime_get_ts64(tp);
82 break; 82 break;
83 case CLOCK_BOOTTIME: 83 case CLOCK_BOOTTIME:
84 get_monotonic_boottime64(tp); 84 ktime_get_boottime_ts64(tp);
85 break; 85 break;
86 default: 86 default:
87 return -EINVAL; 87 return -EINVAL;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index e08ce3f27447..3ac7295306dc 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -228,21 +228,21 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec64 *tp)
228 */ 228 */
229static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) 229static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp)
230{ 230{
231 getrawmonotonic64(tp); 231 ktime_get_raw_ts64(tp);
232 return 0; 232 return 0;
233} 233}
234 234
235 235
236static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) 236static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp)
237{ 237{
238 *tp = current_kernel_time64(); 238 ktime_get_coarse_real_ts64(tp);
239 return 0; 239 return 0;
240} 240}
241 241
242static int posix_get_monotonic_coarse(clockid_t which_clock, 242static int posix_get_monotonic_coarse(clockid_t which_clock,
243 struct timespec64 *tp) 243 struct timespec64 *tp)
244{ 244{
245 *tp = get_monotonic_coarse64(); 245 ktime_get_coarse_ts64(tp);
246 return 0; 246 return 0;
247} 247}
248 248
@@ -254,13 +254,13 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *
254 254
255static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp) 255static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp)
256{ 256{
257 get_monotonic_boottime64(tp); 257 ktime_get_boottime_ts64(tp);
258 return 0; 258 return 0;
259} 259}
260 260
261static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) 261static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
262{ 262{
263 timekeeping_clocktai64(tp); 263 ktime_get_clocktai_ts64(tp);
264 return 0; 264 return 0;
265} 265}
266 266
@@ -283,6 +283,17 @@ static __init int init_posix_timers(void)
283} 283}
284__initcall(init_posix_timers); 284__initcall(init_posix_timers);
285 285
286/*
287 * The siginfo si_overrun field and the return value of timer_getoverrun(2)
288 * are of type int. Clamp the overrun value to INT_MAX
289 */
290static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
291{
292 s64 sum = timr->it_overrun_last + (s64)baseval;
293
294 return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
295}
296
286static void common_hrtimer_rearm(struct k_itimer *timr) 297static void common_hrtimer_rearm(struct k_itimer *timr)
287{ 298{
288 struct hrtimer *timer = &timr->it.real.timer; 299 struct hrtimer *timer = &timr->it.real.timer;
@@ -290,9 +301,8 @@ static void common_hrtimer_rearm(struct k_itimer *timr)
290 if (!timr->it_interval) 301 if (!timr->it_interval)
291 return; 302 return;
292 303
293 timr->it_overrun += (unsigned int) hrtimer_forward(timer, 304 timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
294 timer->base->get_time(), 305 timr->it_interval);
295 timr->it_interval);
296 hrtimer_restart(timer); 306 hrtimer_restart(timer);
297} 307}
298 308
@@ -321,10 +331,10 @@ void posixtimer_rearm(struct siginfo *info)
321 331
322 timr->it_active = 1; 332 timr->it_active = 1;
323 timr->it_overrun_last = timr->it_overrun; 333 timr->it_overrun_last = timr->it_overrun;
324 timr->it_overrun = -1; 334 timr->it_overrun = -1LL;
325 ++timr->it_requeue_pending; 335 ++timr->it_requeue_pending;
326 336
327 info->si_overrun += timr->it_overrun_last; 337 info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
328 } 338 }
329 339
330 unlock_timer(timr, flags); 340 unlock_timer(timr, flags);
@@ -418,9 +428,8 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
418 now = ktime_add(now, kj); 428 now = ktime_add(now, kj);
419 } 429 }
420#endif 430#endif
421 timr->it_overrun += (unsigned int) 431 timr->it_overrun += hrtimer_forward(timer, now,
422 hrtimer_forward(timer, now, 432 timr->it_interval);
423 timr->it_interval);
424 ret = HRTIMER_RESTART; 433 ret = HRTIMER_RESTART;
425 ++timr->it_requeue_pending; 434 ++timr->it_requeue_pending;
426 timr->it_active = 1; 435 timr->it_active = 1;
@@ -524,7 +533,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
524 new_timer->it_id = (timer_t) new_timer_id; 533 new_timer->it_id = (timer_t) new_timer_id;
525 new_timer->it_clock = which_clock; 534 new_timer->it_clock = which_clock;
526 new_timer->kclock = kc; 535 new_timer->kclock = kc;
527 new_timer->it_overrun = -1; 536 new_timer->it_overrun = -1LL;
528 537
529 if (event) { 538 if (event) {
530 rcu_read_lock(); 539 rcu_read_lock();
@@ -645,11 +654,11 @@ static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now)
645 return __hrtimer_expires_remaining_adjusted(timer, now); 654 return __hrtimer_expires_remaining_adjusted(timer, now);
646} 655}
647 656
648static int common_hrtimer_forward(struct k_itimer *timr, ktime_t now) 657static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now)
649{ 658{
650 struct hrtimer *timer = &timr->it.real.timer; 659 struct hrtimer *timer = &timr->it.real.timer;
651 660
652 return (int)hrtimer_forward(timer, now, timr->it_interval); 661 return hrtimer_forward(timer, now, timr->it_interval);
653} 662}
654 663
655/* 664/*
@@ -743,7 +752,7 @@ static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting)
743 752
744/* Get the time remaining on a POSIX.1b interval timer. */ 753/* Get the time remaining on a POSIX.1b interval timer. */
745SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, 754SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
746 struct itimerspec __user *, setting) 755 struct __kernel_itimerspec __user *, setting)
747{ 756{
748 struct itimerspec64 cur_setting; 757 struct itimerspec64 cur_setting;
749 758
@@ -755,7 +764,8 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
755 return ret; 764 return ret;
756} 765}
757 766
758#ifdef CONFIG_COMPAT 767#ifdef CONFIG_COMPAT_32BIT_TIME
768
759COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, 769COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
760 struct compat_itimerspec __user *, setting) 770 struct compat_itimerspec __user *, setting)
761{ 771{
@@ -768,6 +778,7 @@ COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
768 } 778 }
769 return ret; 779 return ret;
770} 780}
781
771#endif 782#endif
772 783
773/* 784/*
@@ -789,7 +800,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
789 if (!timr) 800 if (!timr)
790 return -EINVAL; 801 return -EINVAL;
791 802
792 overrun = timr->it_overrun_last; 803 overrun = timer_overrun_to_int(timr, 0);
793 unlock_timer(timr, flags); 804 unlock_timer(timr, flags);
794 805
795 return overrun; 806 return overrun;
@@ -906,8 +917,8 @@ retry:
906 917
907/* Set a POSIX.1b interval timer */ 918/* Set a POSIX.1b interval timer */
908SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, 919SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
909 const struct itimerspec __user *, new_setting, 920 const struct __kernel_itimerspec __user *, new_setting,
910 struct itimerspec __user *, old_setting) 921 struct __kernel_itimerspec __user *, old_setting)
911{ 922{
912 struct itimerspec64 new_spec, old_spec; 923 struct itimerspec64 new_spec, old_spec;
913 struct itimerspec64 *rtn = old_setting ? &old_spec : NULL; 924 struct itimerspec64 *rtn = old_setting ? &old_spec : NULL;
@@ -927,7 +938,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
927 return error; 938 return error;
928} 939}
929 940
930#ifdef CONFIG_COMPAT 941#ifdef CONFIG_COMPAT_32BIT_TIME
931COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, 942COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
932 struct compat_itimerspec __user *, new, 943 struct compat_itimerspec __user *, new,
933 struct compat_itimerspec __user *, old) 944 struct compat_itimerspec __user *, old)
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index 151e28f5bf30..ddb21145211a 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -19,7 +19,7 @@ struct k_clock {
19 void (*timer_get)(struct k_itimer *timr, 19 void (*timer_get)(struct k_itimer *timr,
20 struct itimerspec64 *cur_setting); 20 struct itimerspec64 *cur_setting);
21 void (*timer_rearm)(struct k_itimer *timr); 21 void (*timer_rearm)(struct k_itimer *timr);
22 int (*timer_forward)(struct k_itimer *timr, ktime_t now); 22 s64 (*timer_forward)(struct k_itimer *timr, ktime_t now);
23 ktime_t (*timer_remaining)(struct k_itimer *timr, ktime_t now); 23 ktime_t (*timer_remaining)(struct k_itimer *timr, ktime_t now);
24 int (*timer_try_to_cancel)(struct k_itimer *timr); 24 int (*timer_try_to_cancel)(struct k_itimer *timr);
25 void (*timer_arm)(struct k_itimer *timr, ktime_t expires, 25 void (*timer_arm)(struct k_itimer *timr, ktime_t expires,
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 2d8f05aad442..cbc72c2c1fca 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -237,7 +237,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
237 pr_debug("Registered %pF as sched_clock source\n", read); 237 pr_debug("Registered %pF as sched_clock source\n", read);
238} 238}
239 239
240void __init sched_clock_postinit(void) 240void __init generic_sched_clock_init(void)
241{ 241{
242 /* 242 /*
243 * If no sched_clock() function has been provided at that point, 243 * If no sched_clock() function has been provided at that point,
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index 58045eb976c3..a59641fb88b6 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -90,7 +90,7 @@ static struct clock_event_device ce_broadcast_hrtimer = {
90 .max_delta_ticks = ULONG_MAX, 90 .max_delta_ticks = ULONG_MAX,
91 .mult = 1, 91 .mult = 1,
92 .shift = 0, 92 .shift = 0,
93 .cpumask = cpu_all_mask, 93 .cpumask = cpu_possible_mask,
94}; 94};
95 95
96static enum hrtimer_restart bc_handler(struct hrtimer *t) 96static enum hrtimer_restart bc_handler(struct hrtimer *t)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index da9455a6b42b..5b33e2f5c0ed 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -642,7 +642,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
642 642
643static inline bool local_timer_softirq_pending(void) 643static inline bool local_timer_softirq_pending(void)
644{ 644{
645 return local_softirq_pending() & TIMER_SOFTIRQ; 645 return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
646} 646}
647 647
648static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) 648static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 2b41e8e2d31d..ccdb351277ee 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(sys_tz);
64 */ 64 */
65SYSCALL_DEFINE1(time, time_t __user *, tloc) 65SYSCALL_DEFINE1(time, time_t __user *, tloc)
66{ 66{
67 time_t i = get_seconds(); 67 time_t i = (time_t)ktime_get_real_seconds();
68 68
69 if (tloc) { 69 if (tloc) {
70 if (put_user(i,tloc)) 70 if (put_user(i,tloc))
@@ -107,11 +107,9 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr)
107/* compat_time_t is a 32 bit "long" and needs to get converted. */ 107/* compat_time_t is a 32 bit "long" and needs to get converted. */
108COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc) 108COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc)
109{ 109{
110 struct timeval tv;
111 compat_time_t i; 110 compat_time_t i;
112 111
113 do_gettimeofday(&tv); 112 i = (compat_time_t)ktime_get_real_seconds();
114 i = tv.tv_sec;
115 113
116 if (tloc) { 114 if (tloc) {
117 if (put_user(i,tloc)) 115 if (put_user(i,tloc))
@@ -931,7 +929,7 @@ int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
931EXPORT_SYMBOL_GPL(compat_put_timespec64); 929EXPORT_SYMBOL_GPL(compat_put_timespec64);
932 930
933int get_itimerspec64(struct itimerspec64 *it, 931int get_itimerspec64(struct itimerspec64 *it,
934 const struct itimerspec __user *uit) 932 const struct __kernel_itimerspec __user *uit)
935{ 933{
936 int ret; 934 int ret;
937 935
@@ -946,7 +944,7 @@ int get_itimerspec64(struct itimerspec64 *it,
946EXPORT_SYMBOL_GPL(get_itimerspec64); 944EXPORT_SYMBOL_GPL(get_itimerspec64);
947 945
948int put_itimerspec64(const struct itimerspec64 *it, 946int put_itimerspec64(const struct itimerspec64 *it,
949 struct itimerspec __user *uit) 947 struct __kernel_itimerspec __user *uit)
950{ 948{
951 int ret; 949 int ret;
952 950
@@ -959,3 +957,24 @@ int put_itimerspec64(const struct itimerspec64 *it,
959 return ret; 957 return ret;
960} 958}
961EXPORT_SYMBOL_GPL(put_itimerspec64); 959EXPORT_SYMBOL_GPL(put_itimerspec64);
960
961int get_compat_itimerspec64(struct itimerspec64 *its,
962 const struct compat_itimerspec __user *uits)
963{
964
965 if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
966 __compat_get_timespec64(&its->it_value, &uits->it_value))
967 return -EFAULT;
968 return 0;
969}
970EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
971
972int put_compat_itimerspec64(const struct itimerspec64 *its,
973 struct compat_itimerspec __user *uits)
974{
975 if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
976 __compat_put_timespec64(&its->it_value, &uits->it_value))
977 return -EFAULT;
978 return 0;
979}
980EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4786df904c22..f3b22f456fac 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -17,6 +17,7 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/sched/loadavg.h> 19#include <linux/sched/loadavg.h>
20#include <linux/sched/clock.h>
20#include <linux/syscore_ops.h> 21#include <linux/syscore_ops.h>
21#include <linux/clocksource.h> 22#include <linux/clocksource.h>
22#include <linux/jiffies.h> 23#include <linux/jiffies.h>
@@ -34,6 +35,14 @@
34#define TK_MIRROR (1 << 1) 35#define TK_MIRROR (1 << 1)
35#define TK_CLOCK_WAS_SET (1 << 2) 36#define TK_CLOCK_WAS_SET (1 << 2)
36 37
38enum timekeeping_adv_mode {
39 /* Update timekeeper when a tick has passed */
40 TK_ADV_TICK,
41
42 /* Update timekeeper on a direct frequency change */
43 TK_ADV_FREQ
44};
45
37/* 46/*
38 * The most important data for readout fits into a single 64 byte 47 * The most important data for readout fits into a single 64 byte
39 * cache line. 48 * cache line.
@@ -97,7 +106,7 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
97 } 106 }
98} 107}
99 108
100static inline struct timespec64 tk_xtime(struct timekeeper *tk) 109static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
101{ 110{
102 struct timespec64 ts; 111 struct timespec64 ts;
103 112
@@ -154,7 +163,7 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
154 * a read of the fast-timekeeper tkrs (which is protected by its own locking 163 * a read of the fast-timekeeper tkrs (which is protected by its own locking
155 * and update logic). 164 * and update logic).
156 */ 165 */
157static inline u64 tk_clock_read(struct tk_read_base *tkr) 166static inline u64 tk_clock_read(const struct tk_read_base *tkr)
158{ 167{
159 struct clocksource *clock = READ_ONCE(tkr->clock); 168 struct clocksource *clock = READ_ONCE(tkr->clock);
160 169
@@ -203,7 +212,7 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
203 } 212 }
204} 213}
205 214
206static inline u64 timekeeping_get_delta(struct tk_read_base *tkr) 215static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
207{ 216{
208 struct timekeeper *tk = &tk_core.timekeeper; 217 struct timekeeper *tk = &tk_core.timekeeper;
209 u64 now, last, mask, max, delta; 218 u64 now, last, mask, max, delta;
@@ -247,7 +256,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
247static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset) 256static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
248{ 257{
249} 258}
250static inline u64 timekeeping_get_delta(struct tk_read_base *tkr) 259static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
251{ 260{
252 u64 cycle_now, delta; 261 u64 cycle_now, delta;
253 262
@@ -344,7 +353,7 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
344static inline u32 arch_gettimeoffset(void) { return 0; } 353static inline u32 arch_gettimeoffset(void) { return 0; }
345#endif 354#endif
346 355
347static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, u64 delta) 356static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
348{ 357{
349 u64 nsec; 358 u64 nsec;
350 359
@@ -355,7 +364,7 @@ static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, u64 delta)
355 return nsec + arch_gettimeoffset(); 364 return nsec + arch_gettimeoffset();
356} 365}
357 366
358static inline u64 timekeeping_get_ns(struct tk_read_base *tkr) 367static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
359{ 368{
360 u64 delta; 369 u64 delta;
361 370
@@ -363,7 +372,7 @@ static inline u64 timekeeping_get_ns(struct tk_read_base *tkr)
363 return timekeeping_delta_to_ns(tkr, delta); 372 return timekeeping_delta_to_ns(tkr, delta);
364} 373}
365 374
366static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, u64 cycles) 375static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
367{ 376{
368 u64 delta; 377 u64 delta;
369 378
@@ -386,7 +395,8 @@ static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, u64 cycles)
386 * slightly wrong timestamp (a few nanoseconds). See 395 * slightly wrong timestamp (a few nanoseconds). See
387 * @ktime_get_mono_fast_ns. 396 * @ktime_get_mono_fast_ns.
388 */ 397 */
389static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf) 398static void update_fast_timekeeper(const struct tk_read_base *tkr,
399 struct tk_fast *tkf)
390{ 400{
391 struct tk_read_base *base = tkf->base; 401 struct tk_read_base *base = tkf->base;
392 402
@@ -541,10 +551,10 @@ EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
541 * number of cycles every time until timekeeping is resumed at which time the 551 * number of cycles every time until timekeeping is resumed at which time the
542 * proper readout base for the fast timekeeper will be restored automatically. 552 * proper readout base for the fast timekeeper will be restored automatically.
543 */ 553 */
544static void halt_fast_timekeeper(struct timekeeper *tk) 554static void halt_fast_timekeeper(const struct timekeeper *tk)
545{ 555{
546 static struct tk_read_base tkr_dummy; 556 static struct tk_read_base tkr_dummy;
547 struct tk_read_base *tkr = &tk->tkr_mono; 557 const struct tk_read_base *tkr = &tk->tkr_mono;
548 558
549 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 559 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
550 cycles_at_suspend = tk_clock_read(tkr); 560 cycles_at_suspend = tk_clock_read(tkr);
@@ -1269,7 +1279,7 @@ EXPORT_SYMBOL(do_settimeofday64);
1269 * 1279 *
1270 * Adds or subtracts an offset value from the current time. 1280 * Adds or subtracts an offset value from the current time.
1271 */ 1281 */
1272static int timekeeping_inject_offset(struct timespec64 *ts) 1282static int timekeeping_inject_offset(const struct timespec64 *ts)
1273{ 1283{
1274 struct timekeeper *tk = &tk_core.timekeeper; 1284 struct timekeeper *tk = &tk_core.timekeeper;
1275 unsigned long flags; 1285 unsigned long flags;
@@ -1496,22 +1506,39 @@ void __weak read_persistent_clock64(struct timespec64 *ts64)
1496} 1506}
1497 1507
1498/** 1508/**
1499 * read_boot_clock64 - Return time of the system start. 1509 * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
1510 * from the boot.
1500 * 1511 *
1501 * Weak dummy function for arches that do not yet support it. 1512 * Weak dummy function for arches that do not yet support it.
1502 * Function to read the exact time the system has been started. 1513 * wall_time - current time as returned by persistent clock
1503 * Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported. 1514 * boot_offset - offset that is defined as wall_time - boot_time
1504 * 1515 * The default function calculates offset based on the current value of
1505 * XXX - Do be sure to remove it once all arches implement it. 1516 * local_clock(). This way architectures that support sched_clock() but don't
1517 * support dedicated boot time clock will provide the best estimate of the
1518 * boot time.
1506 */ 1519 */
1507void __weak read_boot_clock64(struct timespec64 *ts) 1520void __weak __init
1521read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
1522 struct timespec64 *boot_offset)
1508{ 1523{
1509 ts->tv_sec = 0; 1524 read_persistent_clock64(wall_time);
1510 ts->tv_nsec = 0; 1525 *boot_offset = ns_to_timespec64(local_clock());
1511} 1526}
1512 1527
1513/* Flag for if timekeeping_resume() has injected sleeptime */ 1528/*
1514static bool sleeptime_injected; 1529 * Flag reflecting whether timekeeping_resume() has injected sleeptime.
1530 *
1531 * The flag starts of false and is only set when a suspend reaches
1532 * timekeeping_suspend(), timekeeping_resume() sets it to false when the
1533 * timekeeper clocksource is not stopping across suspend and has been
1534 * used to update sleep time. If the timekeeper clocksource has stopped
1535 * then the flag stays true and is used by the RTC resume code to decide
1536 * whether sleeptime must be injected and if so the flag gets false then.
1537 *
1538 * If a suspend fails before reaching timekeeping_resume() then the flag
1539 * stays false and prevents erroneous sleeptime injection.
1540 */
1541static bool suspend_timing_needed;
1515 1542
1516/* Flag for if there is a persistent clock on this platform */ 1543/* Flag for if there is a persistent clock on this platform */
1517static bool persistent_clock_exists; 1544static bool persistent_clock_exists;
@@ -1521,28 +1548,29 @@ static bool persistent_clock_exists;
1521 */ 1548 */
1522void __init timekeeping_init(void) 1549void __init timekeeping_init(void)
1523{ 1550{
1551 struct timespec64 wall_time, boot_offset, wall_to_mono;
1524 struct timekeeper *tk = &tk_core.timekeeper; 1552 struct timekeeper *tk = &tk_core.timekeeper;
1525 struct clocksource *clock; 1553 struct clocksource *clock;
1526 unsigned long flags; 1554 unsigned long flags;
1527 struct timespec64 now, boot, tmp;
1528
1529 read_persistent_clock64(&now);
1530 if (!timespec64_valid_strict(&now)) {
1531 pr_warn("WARNING: Persistent clock returned invalid value!\n"
1532 " Check your CMOS/BIOS settings.\n");
1533 now.tv_sec = 0;
1534 now.tv_nsec = 0;
1535 } else if (now.tv_sec || now.tv_nsec)
1536 persistent_clock_exists = true;
1537 1555
1538 read_boot_clock64(&boot); 1556 read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
1539 if (!timespec64_valid_strict(&boot)) { 1557 if (timespec64_valid_strict(&wall_time) &&
1540 pr_warn("WARNING: Boot clock returned invalid value!\n" 1558 timespec64_to_ns(&wall_time) > 0) {
1541 " Check your CMOS/BIOS settings.\n"); 1559 persistent_clock_exists = true;
1542 boot.tv_sec = 0; 1560 } else if (timespec64_to_ns(&wall_time) != 0) {
1543 boot.tv_nsec = 0; 1561 pr_warn("Persistent clock returned invalid value");
1562 wall_time = (struct timespec64){0};
1544 } 1563 }
1545 1564
1565 if (timespec64_compare(&wall_time, &boot_offset) < 0)
1566 boot_offset = (struct timespec64){0};
1567
1568 /*
1569 * We want set wall_to_mono, so the following is true:
1570 * wall time + wall_to_mono = boot time
1571 */
1572 wall_to_mono = timespec64_sub(boot_offset, wall_time);
1573
1546 raw_spin_lock_irqsave(&timekeeper_lock, flags); 1574 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1547 write_seqcount_begin(&tk_core.seq); 1575 write_seqcount_begin(&tk_core.seq);
1548 ntp_init(); 1576 ntp_init();
@@ -1552,13 +1580,10 @@ void __init timekeeping_init(void)
1552 clock->enable(clock); 1580 clock->enable(clock);
1553 tk_setup_internals(tk, clock); 1581 tk_setup_internals(tk, clock);
1554 1582
1555 tk_set_xtime(tk, &now); 1583 tk_set_xtime(tk, &wall_time);
1556 tk->raw_sec = 0; 1584 tk->raw_sec = 0;
1557 if (boot.tv_sec == 0 && boot.tv_nsec == 0)
1558 boot = tk_xtime(tk);
1559 1585
1560 set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); 1586 tk_set_wall_to_mono(tk, wall_to_mono);
1561 tk_set_wall_to_mono(tk, tmp);
1562 1587
1563 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); 1588 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
1564 1589
@@ -1577,7 +1602,7 @@ static struct timespec64 timekeeping_suspend_time;
1577 * adds the sleep offset to the timekeeping variables. 1602 * adds the sleep offset to the timekeeping variables.
1578 */ 1603 */
1579static void __timekeeping_inject_sleeptime(struct timekeeper *tk, 1604static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
1580 struct timespec64 *delta) 1605 const struct timespec64 *delta)
1581{ 1606{
1582 if (!timespec64_valid_strict(delta)) { 1607 if (!timespec64_valid_strict(delta)) {
1583 printk_deferred(KERN_WARNING 1608 printk_deferred(KERN_WARNING
@@ -1610,7 +1635,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
1610 */ 1635 */
1611bool timekeeping_rtc_skipresume(void) 1636bool timekeeping_rtc_skipresume(void)
1612{ 1637{
1613 return sleeptime_injected; 1638 return !suspend_timing_needed;
1614} 1639}
1615 1640
1616/** 1641/**
@@ -1638,7 +1663,7 @@ bool timekeeping_rtc_skipsuspend(void)
1638 * This function should only be called by rtc_resume(), and allows 1663 * This function should only be called by rtc_resume(), and allows
1639 * a suspend offset to be injected into the timekeeping values. 1664 * a suspend offset to be injected into the timekeeping values.
1640 */ 1665 */
1641void timekeeping_inject_sleeptime64(struct timespec64 *delta) 1666void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
1642{ 1667{
1643 struct timekeeper *tk = &tk_core.timekeeper; 1668 struct timekeeper *tk = &tk_core.timekeeper;
1644 unsigned long flags; 1669 unsigned long flags;
@@ -1646,6 +1671,8 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
1646 raw_spin_lock_irqsave(&timekeeper_lock, flags); 1671 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1647 write_seqcount_begin(&tk_core.seq); 1672 write_seqcount_begin(&tk_core.seq);
1648 1673
1674 suspend_timing_needed = false;
1675
1649 timekeeping_forward_now(tk); 1676 timekeeping_forward_now(tk);
1650 1677
1651 __timekeeping_inject_sleeptime(tk, delta); 1678 __timekeeping_inject_sleeptime(tk, delta);
@@ -1669,9 +1696,9 @@ void timekeeping_resume(void)
1669 struct clocksource *clock = tk->tkr_mono.clock; 1696 struct clocksource *clock = tk->tkr_mono.clock;
1670 unsigned long flags; 1697 unsigned long flags;
1671 struct timespec64 ts_new, ts_delta; 1698 struct timespec64 ts_new, ts_delta;
1672 u64 cycle_now; 1699 u64 cycle_now, nsec;
1700 bool inject_sleeptime = false;
1673 1701
1674 sleeptime_injected = false;
1675 read_persistent_clock64(&ts_new); 1702 read_persistent_clock64(&ts_new);
1676 1703
1677 clockevents_resume(); 1704 clockevents_resume();
@@ -1693,22 +1720,19 @@ void timekeeping_resume(void)
1693 * usable source. The rtc part is handled separately in rtc core code. 1720 * usable source. The rtc part is handled separately in rtc core code.
1694 */ 1721 */
1695 cycle_now = tk_clock_read(&tk->tkr_mono); 1722 cycle_now = tk_clock_read(&tk->tkr_mono);
1696 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && 1723 nsec = clocksource_stop_suspend_timing(clock, cycle_now);
1697 cycle_now > tk->tkr_mono.cycle_last) { 1724 if (nsec > 0) {
1698 u64 nsec, cyc_delta;
1699
1700 cyc_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
1701 tk->tkr_mono.mask);
1702 nsec = mul_u64_u32_shr(cyc_delta, clock->mult, clock->shift);
1703 ts_delta = ns_to_timespec64(nsec); 1725 ts_delta = ns_to_timespec64(nsec);
1704 sleeptime_injected = true; 1726 inject_sleeptime = true;
1705 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { 1727 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
1706 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); 1728 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
1707 sleeptime_injected = true; 1729 inject_sleeptime = true;
1708 } 1730 }
1709 1731
1710 if (sleeptime_injected) 1732 if (inject_sleeptime) {
1733 suspend_timing_needed = false;
1711 __timekeeping_inject_sleeptime(tk, &ts_delta); 1734 __timekeeping_inject_sleeptime(tk, &ts_delta);
1735 }
1712 1736
1713 /* Re-base the last cycle value */ 1737 /* Re-base the last cycle value */
1714 tk->tkr_mono.cycle_last = cycle_now; 1738 tk->tkr_mono.cycle_last = cycle_now;
@@ -1732,6 +1756,8 @@ int timekeeping_suspend(void)
1732 unsigned long flags; 1756 unsigned long flags;
1733 struct timespec64 delta, delta_delta; 1757 struct timespec64 delta, delta_delta;
1734 static struct timespec64 old_delta; 1758 static struct timespec64 old_delta;
1759 struct clocksource *curr_clock;
1760 u64 cycle_now;
1735 1761
1736 read_persistent_clock64(&timekeeping_suspend_time); 1762 read_persistent_clock64(&timekeeping_suspend_time);
1737 1763
@@ -1743,11 +1769,22 @@ int timekeeping_suspend(void)
1743 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) 1769 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
1744 persistent_clock_exists = true; 1770 persistent_clock_exists = true;
1745 1771
1772 suspend_timing_needed = true;
1773
1746 raw_spin_lock_irqsave(&timekeeper_lock, flags); 1774 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1747 write_seqcount_begin(&tk_core.seq); 1775 write_seqcount_begin(&tk_core.seq);
1748 timekeeping_forward_now(tk); 1776 timekeeping_forward_now(tk);
1749 timekeeping_suspended = 1; 1777 timekeeping_suspended = 1;
1750 1778
1779 /*
1780 * Since we've called forward_now, cycle_last stores the value
1781 * just read from the current clocksource. Save this to potentially
1782 * use in suspend timing.
1783 */
1784 curr_clock = tk->tkr_mono.clock;
1785 cycle_now = tk->tkr_mono.cycle_last;
1786 clocksource_start_suspend_timing(curr_clock, cycle_now);
1787
1751 if (persistent_clock_exists) { 1788 if (persistent_clock_exists) {
1752 /* 1789 /*
1753 * To avoid drift caused by repeated suspend/resumes, 1790 * To avoid drift caused by repeated suspend/resumes,
@@ -2021,11 +2058,11 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
2021 return offset; 2058 return offset;
2022} 2059}
2023 2060
2024/** 2061/*
2025 * update_wall_time - Uses the current clocksource to increment the wall time 2062 * timekeeping_advance - Updates the timekeeper to the current time and
2026 * 2063 * current NTP tick length
2027 */ 2064 */
2028void update_wall_time(void) 2065static void timekeeping_advance(enum timekeeping_adv_mode mode)
2029{ 2066{
2030 struct timekeeper *real_tk = &tk_core.timekeeper; 2067 struct timekeeper *real_tk = &tk_core.timekeeper;
2031 struct timekeeper *tk = &shadow_timekeeper; 2068 struct timekeeper *tk = &shadow_timekeeper;
@@ -2042,14 +2079,17 @@ void update_wall_time(void)
2042 2079
2043#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 2080#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
2044 offset = real_tk->cycle_interval; 2081 offset = real_tk->cycle_interval;
2082
2083 if (mode != TK_ADV_TICK)
2084 goto out;
2045#else 2085#else
2046 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), 2086 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
2047 tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 2087 tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
2048#endif
2049 2088
2050 /* Check if there's really nothing to do */ 2089 /* Check if there's really nothing to do */
2051 if (offset < real_tk->cycle_interval) 2090 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
2052 goto out; 2091 goto out;
2092#endif
2053 2093
2054 /* Do some additional sanity checking */ 2094 /* Do some additional sanity checking */
2055 timekeeping_check_update(tk, offset); 2095 timekeeping_check_update(tk, offset);
@@ -2106,6 +2146,15 @@ out:
2106} 2146}
2107 2147
2108/** 2148/**
2149 * update_wall_time - Uses the current clocksource to increment the wall time
2150 *
2151 */
2152void update_wall_time(void)
2153{
2154 timekeeping_advance(TK_ADV_TICK);
2155}
2156
2157/**
2109 * getboottime64 - Return the real time of system boot. 2158 * getboottime64 - Return the real time of system boot.
2110 * @ts: pointer to the timespec64 to be set 2159 * @ts: pointer to the timespec64 to be set
2111 * 2160 *
@@ -2220,7 +2269,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
2220/** 2269/**
2221 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex 2270 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
2222 */ 2271 */
2223static int timekeeping_validate_timex(struct timex *txc) 2272static int timekeeping_validate_timex(const struct timex *txc)
2224{ 2273{
2225 if (txc->modes & ADJ_ADJTIME) { 2274 if (txc->modes & ADJ_ADJTIME) {
2226 /* singleshot must not be used with any other mode bits */ 2275 /* singleshot must not be used with any other mode bits */
@@ -2310,7 +2359,7 @@ int do_adjtimex(struct timex *txc)
2310 return ret; 2359 return ret;
2311 } 2360 }
2312 2361
2313 getnstimeofday64(&ts); 2362 ktime_get_real_ts64(&ts);
2314 2363
2315 raw_spin_lock_irqsave(&timekeeper_lock, flags); 2364 raw_spin_lock_irqsave(&timekeeper_lock, flags);
2316 write_seqcount_begin(&tk_core.seq); 2365 write_seqcount_begin(&tk_core.seq);
@@ -2327,6 +2376,10 @@ int do_adjtimex(struct timex *txc)
2327 write_seqcount_end(&tk_core.seq); 2376 write_seqcount_end(&tk_core.seq);
2328 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 2377 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
2329 2378
2379 /* Update the multiplier immediately if frequency was set directly */
2380 if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
2381 timekeeping_advance(TK_ADV_FREQ);
2382
2330 if (tai != orig_tai) 2383 if (tai != orig_tai)
2331 clock_was_set(); 2384 clock_was_set();
2332 2385
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 0754cadfa9e6..238e4be60229 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -70,7 +70,7 @@ static int __init tk_debug_sleep_time_init(void)
70} 70}
71late_initcall(tk_debug_sleep_time_init); 71late_initcall(tk_debug_sleep_time_init);
72 72
73void tk_debug_account_sleep_time(struct timespec64 *t) 73void tk_debug_account_sleep_time(const struct timespec64 *t)
74{ 74{
75 /* Cap bin index so we don't overflow the array */ 75 /* Cap bin index so we don't overflow the array */
76 int bin = min(fls(t->tv_sec), NUM_BINS-1); 76 int bin = min(fls(t->tv_sec), NUM_BINS-1);
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index cf5c0828ee31..bcbb52db2256 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -8,7 +8,7 @@
8#include <linux/time.h> 8#include <linux/time.h>
9 9
10#ifdef CONFIG_DEBUG_FS 10#ifdef CONFIG_DEBUG_FS
11extern void tk_debug_account_sleep_time(struct timespec64 *t); 11extern void tk_debug_account_sleep_time(const struct timespec64 *t);
12#else 12#else
13#define tk_debug_account_sleep_time(x) 13#define tk_debug_account_sleep_time(x)
14#endif 14#endif
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index cc2d23e6ff61..fa49cd753dea 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -581,7 +581,7 @@ trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
581 * wheel: 581 * wheel:
582 */ 582 */
583 base->next_expiry = timer->expires; 583 base->next_expiry = timer->expires;
584 wake_up_nohz_cpu(base->cpu); 584 wake_up_nohz_cpu(base->cpu);
585} 585}
586 586
587static void 587static void
@@ -1657,6 +1657,22 @@ static inline void __run_timers(struct timer_base *base)
1657 1657
1658 raw_spin_lock_irq(&base->lock); 1658 raw_spin_lock_irq(&base->lock);
1659 1659
1660 /*
1661 * timer_base::must_forward_clk must be cleared before running
1662 * timers so that any timer functions that call mod_timer() will
1663 * not try to forward the base. Idle tracking / clock forwarding
1664 * logic is only used with BASE_STD timers.
1665 *
1666 * The must_forward_clk flag is cleared unconditionally also for
1667 * the deferrable base. The deferrable base is not affected by idle
1668 * tracking and never forwarded, so clearing the flag is a NOOP.
1669 *
1670 * The fact that the deferrable base is never forwarded can cause
1671 * large variations in granularity for deferrable timers, but they
1672 * can be deferred for long periods due to idle anyway.
1673 */
1674 base->must_forward_clk = false;
1675
1660 while (time_after_eq(jiffies, base->clk)) { 1676 while (time_after_eq(jiffies, base->clk)) {
1661 1677
1662 levels = collect_expired_timers(base, heads); 1678 levels = collect_expired_timers(base, heads);
@@ -1676,19 +1692,6 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
1676{ 1692{
1677 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 1693 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1678 1694
1679 /*
1680 * must_forward_clk must be cleared before running timers so that any
1681 * timer functions that call mod_timer will not try to forward the
1682 * base. idle trcking / clock forwarding logic is only used with
1683 * BASE_STD timers.
1684 *
1685 * The deferrable base does not do idle tracking at all, so we do
1686 * not forward it. This can result in very large variations in
1687 * granularity for deferrable timers, but they can be deferred for
1688 * long periods due to idle.
1689 */
1690 base->must_forward_clk = false;
1691
1692 __run_timers(base); 1695 __run_timers(base);
1693 if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) 1696 if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
1694 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); 1697 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
diff --git a/kernel/torture.c b/kernel/torture.c
index 3de1efbecd6a..1ac24a826589 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -20,6 +20,9 @@
20 * Author: Paul E. McKenney <paulmck@us.ibm.com> 20 * Author: Paul E. McKenney <paulmck@us.ibm.com>
21 * Based on kernel/rcu/torture.c. 21 * Based on kernel/rcu/torture.c.
22 */ 22 */
23
24#define pr_fmt(fmt) fmt
25
23#include <linux/types.h> 26#include <linux/types.h>
24#include <linux/kernel.h> 27#include <linux/kernel.h>
25#include <linux/init.h> 28#include <linux/init.h>
@@ -53,7 +56,7 @@ MODULE_LICENSE("GPL");
53MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>"); 56MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
54 57
55static char *torture_type; 58static char *torture_type;
56static bool verbose; 59static int verbose;
57 60
58/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ 61/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
59#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ 62#define FULLSTOP_DONTSTOP 0 /* Normal operation. */
@@ -98,7 +101,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
98 if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu)) 101 if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
99 return false; 102 return false;
100 103
101 if (verbose) 104 if (verbose > 1)
102 pr_alert("%s" TORTURE_FLAG 105 pr_alert("%s" TORTURE_FLAG
103 "torture_onoff task: offlining %d\n", 106 "torture_onoff task: offlining %d\n",
104 torture_type, cpu); 107 torture_type, cpu);
@@ -111,7 +114,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
111 "torture_onoff task: offline %d failed: errno %d\n", 114 "torture_onoff task: offline %d failed: errno %d\n",
112 torture_type, cpu, ret); 115 torture_type, cpu, ret);
113 } else { 116 } else {
114 if (verbose) 117 if (verbose > 1)
115 pr_alert("%s" TORTURE_FLAG 118 pr_alert("%s" TORTURE_FLAG
116 "torture_onoff task: offlined %d\n", 119 "torture_onoff task: offlined %d\n",
117 torture_type, cpu); 120 torture_type, cpu);
@@ -147,7 +150,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
147 if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu)) 150 if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
148 return false; 151 return false;
149 152
150 if (verbose) 153 if (verbose > 1)
151 pr_alert("%s" TORTURE_FLAG 154 pr_alert("%s" TORTURE_FLAG
152 "torture_onoff task: onlining %d\n", 155 "torture_onoff task: onlining %d\n",
153 torture_type, cpu); 156 torture_type, cpu);
@@ -160,7 +163,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
160 "torture_onoff task: online %d failed: errno %d\n", 163 "torture_onoff task: online %d failed: errno %d\n",
161 torture_type, cpu, ret); 164 torture_type, cpu, ret);
162 } else { 165 } else {
163 if (verbose) 166 if (verbose > 1)
164 pr_alert("%s" TORTURE_FLAG 167 pr_alert("%s" TORTURE_FLAG
165 "torture_onoff task: onlined %d\n", 168 "torture_onoff task: onlined %d\n",
166 torture_type, cpu); 169 torture_type, cpu);
@@ -647,7 +650,7 @@ static void torture_stutter_cleanup(void)
647 * The runnable parameter points to a flag that controls whether or not 650 * The runnable parameter points to a flag that controls whether or not
648 * the test is currently runnable. If there is no such flag, pass in NULL. 651 * the test is currently runnable. If there is no such flag, pass in NULL.
649 */ 652 */
650bool torture_init_begin(char *ttype, bool v) 653bool torture_init_begin(char *ttype, int v)
651{ 654{
652 mutex_lock(&fullstop_mutex); 655 mutex_lock(&fullstop_mutex);
653 if (torture_type != NULL) { 656 if (torture_type != NULL) {
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 576d18045811..5470dce212c0 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -18,18 +18,14 @@
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/sysctl.h> 20#include <linux/sysctl.h>
21#include <linux/smpboot.h>
22#include <linux/sched/rt.h>
23#include <uapi/linux/sched/types.h>
24#include <linux/tick.h> 21#include <linux/tick.h>
25#include <linux/workqueue.h>
26#include <linux/sched/clock.h> 22#include <linux/sched/clock.h>
27#include <linux/sched/debug.h> 23#include <linux/sched/debug.h>
28#include <linux/sched/isolation.h> 24#include <linux/sched/isolation.h>
25#include <linux/stop_machine.h>
29 26
30#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
31#include <linux/kvm_para.h> 28#include <linux/kvm_para.h>
32#include <linux/kthread.h>
33 29
34static DEFINE_MUTEX(watchdog_mutex); 30static DEFINE_MUTEX(watchdog_mutex);
35 31
@@ -169,11 +165,10 @@ static void lockup_detector_update_enable(void)
169unsigned int __read_mostly softlockup_panic = 165unsigned int __read_mostly softlockup_panic =
170 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; 166 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
171 167
172static bool softlockup_threads_initialized __read_mostly; 168static bool softlockup_initialized __read_mostly;
173static u64 __read_mostly sample_period; 169static u64 __read_mostly sample_period;
174 170
175static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 171static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
176static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
177static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); 172static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
178static DEFINE_PER_CPU(bool, softlockup_touch_sync); 173static DEFINE_PER_CPU(bool, softlockup_touch_sync);
179static DEFINE_PER_CPU(bool, soft_watchdog_warn); 174static DEFINE_PER_CPU(bool, soft_watchdog_warn);
@@ -335,6 +330,27 @@ static void watchdog_interrupt_count(void)
335 __this_cpu_inc(hrtimer_interrupts); 330 __this_cpu_inc(hrtimer_interrupts);
336} 331}
337 332
333static DEFINE_PER_CPU(struct completion, softlockup_completion);
334static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
335
336/*
337 * The watchdog thread function - touches the timestamp.
338 *
339 * It only runs once every sample_period seconds (4 seconds by
340 * default) to reset the softlockup timestamp. If this gets delayed
341 * for more than 2*watchdog_thresh seconds then the debug-printout
342 * triggers in watchdog_timer_fn().
343 */
344static int softlockup_fn(void *data)
345{
346 __this_cpu_write(soft_lockup_hrtimer_cnt,
347 __this_cpu_read(hrtimer_interrupts));
348 __touch_watchdog();
349 complete(this_cpu_ptr(&softlockup_completion));
350
351 return 0;
352}
353
338/* watchdog kicker functions */ 354/* watchdog kicker functions */
339static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 355static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
340{ 356{
@@ -350,7 +366,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
350 watchdog_interrupt_count(); 366 watchdog_interrupt_count();
351 367
352 /* kick the softlockup detector */ 368 /* kick the softlockup detector */
353 wake_up_process(__this_cpu_read(softlockup_watchdog)); 369 if (completion_done(this_cpu_ptr(&softlockup_completion))) {
370 reinit_completion(this_cpu_ptr(&softlockup_completion));
371 stop_one_cpu_nowait(smp_processor_id(),
372 softlockup_fn, NULL,
373 this_cpu_ptr(&softlockup_stop_work));
374 }
354 375
355 /* .. and repeat */ 376 /* .. and repeat */
356 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); 377 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
@@ -448,16 +469,15 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
448 return HRTIMER_RESTART; 469 return HRTIMER_RESTART;
449} 470}
450 471
451static void watchdog_set_prio(unsigned int policy, unsigned int prio)
452{
453 struct sched_param param = { .sched_priority = prio };
454
455 sched_setscheduler(current, policy, &param);
456}
457
458static void watchdog_enable(unsigned int cpu) 472static void watchdog_enable(unsigned int cpu)
459{ 473{
460 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); 474 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
475 struct completion *done = this_cpu_ptr(&softlockup_completion);
476
477 WARN_ON_ONCE(cpu != smp_processor_id());
478
479 init_completion(done);
480 complete(done);
461 481
462 /* 482 /*
463 * Start the timer first to prevent the NMI watchdog triggering 483 * Start the timer first to prevent the NMI watchdog triggering
@@ -473,15 +493,14 @@ static void watchdog_enable(unsigned int cpu)
473 /* Enable the perf event */ 493 /* Enable the perf event */
474 if (watchdog_enabled & NMI_WATCHDOG_ENABLED) 494 if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
475 watchdog_nmi_enable(cpu); 495 watchdog_nmi_enable(cpu);
476
477 watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
478} 496}
479 497
480static void watchdog_disable(unsigned int cpu) 498static void watchdog_disable(unsigned int cpu)
481{ 499{
482 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); 500 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
483 501
484 watchdog_set_prio(SCHED_NORMAL, 0); 502 WARN_ON_ONCE(cpu != smp_processor_id());
503
485 /* 504 /*
486 * Disable the perf event first. That prevents that a large delay 505 * Disable the perf event first. That prevents that a large delay
487 * between disabling the timer and disabling the perf event causes 506 * between disabling the timer and disabling the perf event causes
@@ -489,79 +508,66 @@ static void watchdog_disable(unsigned int cpu)
489 */ 508 */
490 watchdog_nmi_disable(cpu); 509 watchdog_nmi_disable(cpu);
491 hrtimer_cancel(hrtimer); 510 hrtimer_cancel(hrtimer);
511 wait_for_completion(this_cpu_ptr(&softlockup_completion));
492} 512}
493 513
494static void watchdog_cleanup(unsigned int cpu, bool online) 514static int softlockup_stop_fn(void *data)
495{ 515{
496 watchdog_disable(cpu); 516 watchdog_disable(smp_processor_id());
517 return 0;
497} 518}
498 519
499static int watchdog_should_run(unsigned int cpu) 520static void softlockup_stop_all(void)
500{ 521{
501 return __this_cpu_read(hrtimer_interrupts) != 522 int cpu;
502 __this_cpu_read(soft_lockup_hrtimer_cnt); 523
524 if (!softlockup_initialized)
525 return;
526
527 for_each_cpu(cpu, &watchdog_allowed_mask)
528 smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
529
530 cpumask_clear(&watchdog_allowed_mask);
503} 531}
504 532
505/* 533static int softlockup_start_fn(void *data)
506 * The watchdog thread function - touches the timestamp.
507 *
508 * It only runs once every sample_period seconds (4 seconds by
509 * default) to reset the softlockup timestamp. If this gets delayed
510 * for more than 2*watchdog_thresh seconds then the debug-printout
511 * triggers in watchdog_timer_fn().
512 */
513static void watchdog(unsigned int cpu)
514{ 534{
515 __this_cpu_write(soft_lockup_hrtimer_cnt, 535 watchdog_enable(smp_processor_id());
516 __this_cpu_read(hrtimer_interrupts)); 536 return 0;
517 __touch_watchdog();
518} 537}
519 538
520static struct smp_hotplug_thread watchdog_threads = { 539static void softlockup_start_all(void)
521 .store = &softlockup_watchdog,
522 .thread_should_run = watchdog_should_run,
523 .thread_fn = watchdog,
524 .thread_comm = "watchdog/%u",
525 .setup = watchdog_enable,
526 .cleanup = watchdog_cleanup,
527 .park = watchdog_disable,
528 .unpark = watchdog_enable,
529};
530
531static void softlockup_update_smpboot_threads(void)
532{ 540{
533 lockdep_assert_held(&watchdog_mutex); 541 int cpu;
534
535 if (!softlockup_threads_initialized)
536 return;
537 542
538 smpboot_update_cpumask_percpu_thread(&watchdog_threads, 543 cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
539 &watchdog_allowed_mask); 544 for_each_cpu(cpu, &watchdog_allowed_mask)
545 smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
540} 546}
541 547
542/* Temporarily park all watchdog threads */ 548int lockup_detector_online_cpu(unsigned int cpu)
543static void softlockup_park_all_threads(void)
544{ 549{
545 cpumask_clear(&watchdog_allowed_mask); 550 watchdog_enable(cpu);
546 softlockup_update_smpboot_threads(); 551 return 0;
547} 552}
548 553
549/* Unpark enabled threads */ 554int lockup_detector_offline_cpu(unsigned int cpu)
550static void softlockup_unpark_threads(void)
551{ 555{
552 cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); 556 watchdog_disable(cpu);
553 softlockup_update_smpboot_threads(); 557 return 0;
554} 558}
555 559
556static void lockup_detector_reconfigure(void) 560static void lockup_detector_reconfigure(void)
557{ 561{
558 cpus_read_lock(); 562 cpus_read_lock();
559 watchdog_nmi_stop(); 563 watchdog_nmi_stop();
560 softlockup_park_all_threads(); 564
565 softlockup_stop_all();
561 set_sample_period(); 566 set_sample_period();
562 lockup_detector_update_enable(); 567 lockup_detector_update_enable();
563 if (watchdog_enabled && watchdog_thresh) 568 if (watchdog_enabled && watchdog_thresh)
564 softlockup_unpark_threads(); 569 softlockup_start_all();
570
565 watchdog_nmi_start(); 571 watchdog_nmi_start();
566 cpus_read_unlock(); 572 cpus_read_unlock();
567 /* 573 /*
@@ -580,8 +586,6 @@ static void lockup_detector_reconfigure(void)
580 */ 586 */
581static __init void lockup_detector_setup(void) 587static __init void lockup_detector_setup(void)
582{ 588{
583 int ret;
584
585 /* 589 /*
586 * If sysctl is off and watchdog got disabled on the command line, 590 * If sysctl is off and watchdog got disabled on the command line,
587 * nothing to do here. 591 * nothing to do here.
@@ -592,24 +596,13 @@ static __init void lockup_detector_setup(void)
592 !(watchdog_enabled && watchdog_thresh)) 596 !(watchdog_enabled && watchdog_thresh))
593 return; 597 return;
594 598
595 ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
596 &watchdog_allowed_mask);
597 if (ret) {
598 pr_err("Failed to initialize soft lockup detector threads\n");
599 return;
600 }
601
602 mutex_lock(&watchdog_mutex); 599 mutex_lock(&watchdog_mutex);
603 softlockup_threads_initialized = true;
604 lockup_detector_reconfigure(); 600 lockup_detector_reconfigure();
601 softlockup_initialized = true;
605 mutex_unlock(&watchdog_mutex); 602 mutex_unlock(&watchdog_mutex);
606} 603}
607 604
608#else /* CONFIG_SOFTLOCKUP_DETECTOR */ 605#else /* CONFIG_SOFTLOCKUP_DETECTOR */
609static inline int watchdog_park_threads(void) { return 0; }
610static inline void watchdog_unpark_threads(void) { }
611static inline int watchdog_enable_all_cpus(void) { return 0; }
612static inline void watchdog_disable_all_cpus(void) { }
613static void lockup_detector_reconfigure(void) 606static void lockup_detector_reconfigure(void)
614{ 607{
615 cpus_read_lock(); 608 cpus_read_lock();
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index e449a23e9d59..1f7020d65d0a 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -175,8 +175,8 @@ static int hardlockup_detector_event_create(void)
175 evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL, 175 evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
176 watchdog_overflow_callback, NULL); 176 watchdog_overflow_callback, NULL);
177 if (IS_ERR(evt)) { 177 if (IS_ERR(evt)) {
178 pr_info("Perf event create on CPU %d failed with %ld\n", cpu, 178 pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
179 PTR_ERR(evt)); 179 PTR_ERR(evt));
180 return PTR_ERR(evt); 180 return PTR_ERR(evt);
181 } 181 }
182 this_cpu_write(watchdog_ev, evt); 182 this_cpu_write(watchdog_ev, evt);
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 19d42ea75ec2..98fa559ebd80 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,9 +1,6 @@
1config ARCH_HAS_UBSAN_SANITIZE_ALL 1config ARCH_HAS_UBSAN_SANITIZE_ALL
2 bool 2 bool
3 3
4config ARCH_WANTS_UBSAN_NO_NULL
5 def_bool n
6
7config UBSAN 4config UBSAN
8 bool "Undefined behaviour sanity checker" 5 bool "Undefined behaviour sanity checker"
9 help 6 help
@@ -39,14 +36,6 @@ config UBSAN_ALIGNMENT
39 Enabling this option on architectures that support unaligned 36 Enabling this option on architectures that support unaligned
40 accesses may produce a lot of false positives. 37 accesses may produce a lot of false positives.
41 38
42config UBSAN_NULL
43 bool "Enable checking of null pointers"
44 depends on UBSAN
45 default y if !ARCH_WANTS_UBSAN_NO_NULL
46 help
47 This option enables detection of memory accesses via a
48 null pointer.
49
50config TEST_UBSAN 39config TEST_UBSAN
51 tristate "Module for testing for undefined behavior detection" 40 tristate "Module for testing for undefined behavior detection"
52 depends on m && UBSAN 41 depends on m && UBSAN
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 53c2d5edc826..1d91e31eceec 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -178,18 +178,18 @@ long long atomic64_xchg(atomic64_t *v, long long new)
178} 178}
179EXPORT_SYMBOL(atomic64_xchg); 179EXPORT_SYMBOL(atomic64_xchg);
180 180
181int atomic64_add_unless(atomic64_t *v, long long a, long long u) 181long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u)
182{ 182{
183 unsigned long flags; 183 unsigned long flags;
184 raw_spinlock_t *lock = lock_addr(v); 184 raw_spinlock_t *lock = lock_addr(v);
185 int ret = 0; 185 long long val;
186 186
187 raw_spin_lock_irqsave(lock, flags); 187 raw_spin_lock_irqsave(lock, flags);
188 if (v->counter != u) { 188 val = v->counter;
189 if (val != u)
189 v->counter += a; 190 v->counter += a;
190 ret = 1;
191 }
192 raw_spin_unlock_irqrestore(lock, flags); 191 raw_spin_unlock_irqrestore(lock, flags);
193 return ret; 192
193 return val;
194} 194}
195EXPORT_SYMBOL(atomic64_add_unless); 195EXPORT_SYMBOL(atomic64_fetch_add_unless);
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 994be4805cec..70935ed91125 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -360,9 +360,12 @@ static void debug_object_is_on_stack(void *addr, int onstack)
360 360
361 limit++; 361 limit++;
362 if (is_on_stack) 362 if (is_on_stack)
363 pr_warn("object is on stack, but not annotated\n"); 363 pr_warn("object %p is on stack %p, but NOT annotated.\n", addr,
364 task_stack_page(current));
364 else 365 else
365 pr_warn("object is not on stack, but annotated\n"); 366 pr_warn("object %p is NOT on stack %p, but annotated.\n", addr,
367 task_stack_page(current));
368
366 WARN_ON(1); 369 WARN_ON(1);
367} 370}
368 371
@@ -1185,8 +1188,7 @@ void __init debug_objects_mem_init(void)
1185 1188
1186 if (!obj_cache || debug_objects_replace_static_objects()) { 1189 if (!obj_cache || debug_objects_replace_static_objects()) {
1187 debug_objects_enabled = 0; 1190 debug_objects_enabled = 0;
1188 if (obj_cache) 1191 kmem_cache_destroy(obj_cache);
1189 kmem_cache_destroy(obj_cache);
1190 pr_warn("out of memory.\n"); 1192 pr_warn("out of memory.\n");
1191 } else 1193 } else
1192 debug_objects_selftest(); 1194 debug_objects_selftest();
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 54e5bbaa3200..517f5853ffed 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
92 if (ioremap_pmd_enabled() && 92 if (ioremap_pmd_enabled() &&
93 ((next - addr) == PMD_SIZE) && 93 ((next - addr) == PMD_SIZE) &&
94 IS_ALIGNED(phys_addr + addr, PMD_SIZE) && 94 IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
95 pmd_free_pte_page(pmd)) { 95 pmd_free_pte_page(pmd, addr)) {
96 if (pmd_set_huge(pmd, phys_addr + addr, prot)) 96 if (pmd_set_huge(pmd, phys_addr + addr, prot))
97 continue; 97 continue;
98 } 98 }
@@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
119 if (ioremap_pud_enabled() && 119 if (ioremap_pud_enabled() &&
120 ((next - addr) == PUD_SIZE) && 120 ((next - addr) == PUD_SIZE) &&
121 IS_ALIGNED(phys_addr + addr, PUD_SIZE) && 121 IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
122 pud_free_pmd_page(pud)) { 122 pud_free_pmd_page(pud, addr)) {
123 if (pud_set_huge(pud, phys_addr + addr, prot)) 123 if (pud_set_huge(pud, phys_addr + addr, prot))
124 continue; 124 continue;
125 } 125 }
diff --git a/lib/refcount.c b/lib/refcount.c
index d3b81cefce91..ebcf8cd49e05 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -35,13 +35,13 @@
35 * 35 *
36 */ 36 */
37 37
38#include <linux/mutex.h>
38#include <linux/refcount.h> 39#include <linux/refcount.h>
40#include <linux/spinlock.h>
39#include <linux/bug.h> 41#include <linux/bug.h>
40 42
41#ifdef CONFIG_REFCOUNT_FULL
42
43/** 43/**
44 * refcount_add_not_zero - add a value to a refcount unless it is 0 44 * refcount_add_not_zero_checked - add a value to a refcount unless it is 0
45 * @i: the value to add to the refcount 45 * @i: the value to add to the refcount
46 * @r: the refcount 46 * @r: the refcount
47 * 47 *
@@ -58,7 +58,7 @@
58 * 58 *
59 * Return: false if the passed refcount is 0, true otherwise 59 * Return: false if the passed refcount is 0, true otherwise
60 */ 60 */
61bool refcount_add_not_zero(unsigned int i, refcount_t *r) 61bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r)
62{ 62{
63 unsigned int new, val = atomic_read(&r->refs); 63 unsigned int new, val = atomic_read(&r->refs);
64 64
@@ -79,10 +79,10 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
79 79
80 return true; 80 return true;
81} 81}
82EXPORT_SYMBOL(refcount_add_not_zero); 82EXPORT_SYMBOL(refcount_add_not_zero_checked);
83 83
84/** 84/**
85 * refcount_add - add a value to a refcount 85 * refcount_add_checked - add a value to a refcount
86 * @i: the value to add to the refcount 86 * @i: the value to add to the refcount
87 * @r: the refcount 87 * @r: the refcount
88 * 88 *
@@ -97,14 +97,14 @@ EXPORT_SYMBOL(refcount_add_not_zero);
97 * cases, refcount_inc(), or one of its variants, should instead be used to 97 * cases, refcount_inc(), or one of its variants, should instead be used to
98 * increment a reference count. 98 * increment a reference count.
99 */ 99 */
100void refcount_add(unsigned int i, refcount_t *r) 100void refcount_add_checked(unsigned int i, refcount_t *r)
101{ 101{
102 WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); 102 WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n");
103} 103}
104EXPORT_SYMBOL(refcount_add); 104EXPORT_SYMBOL(refcount_add_checked);
105 105
106/** 106/**
107 * refcount_inc_not_zero - increment a refcount unless it is 0 107 * refcount_inc_not_zero_checked - increment a refcount unless it is 0
108 * @r: the refcount to increment 108 * @r: the refcount to increment
109 * 109 *
110 * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN. 110 * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN.
@@ -115,7 +115,7 @@ EXPORT_SYMBOL(refcount_add);
115 * 115 *
116 * Return: true if the increment was successful, false otherwise 116 * Return: true if the increment was successful, false otherwise
117 */ 117 */
118bool refcount_inc_not_zero(refcount_t *r) 118bool refcount_inc_not_zero_checked(refcount_t *r)
119{ 119{
120 unsigned int new, val = atomic_read(&r->refs); 120 unsigned int new, val = atomic_read(&r->refs);
121 121
@@ -134,10 +134,10 @@ bool refcount_inc_not_zero(refcount_t *r)
134 134
135 return true; 135 return true;
136} 136}
137EXPORT_SYMBOL(refcount_inc_not_zero); 137EXPORT_SYMBOL(refcount_inc_not_zero_checked);
138 138
139/** 139/**
140 * refcount_inc - increment a refcount 140 * refcount_inc_checked - increment a refcount
141 * @r: the refcount to increment 141 * @r: the refcount to increment
142 * 142 *
143 * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN. 143 * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN.
@@ -148,14 +148,14 @@ EXPORT_SYMBOL(refcount_inc_not_zero);
148 * Will WARN if the refcount is 0, as this represents a possible use-after-free 148 * Will WARN if the refcount is 0, as this represents a possible use-after-free
149 * condition. 149 * condition.
150 */ 150 */
151void refcount_inc(refcount_t *r) 151void refcount_inc_checked(refcount_t *r)
152{ 152{
153 WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); 153 WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n");
154} 154}
155EXPORT_SYMBOL(refcount_inc); 155EXPORT_SYMBOL(refcount_inc_checked);
156 156
157/** 157/**
158 * refcount_sub_and_test - subtract from a refcount and test if it is 0 158 * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0
159 * @i: amount to subtract from the refcount 159 * @i: amount to subtract from the refcount
160 * @r: the refcount 160 * @r: the refcount
161 * 161 *
@@ -174,7 +174,7 @@ EXPORT_SYMBOL(refcount_inc);
174 * 174 *
175 * Return: true if the resulting refcount is 0, false otherwise 175 * Return: true if the resulting refcount is 0, false otherwise
176 */ 176 */
177bool refcount_sub_and_test(unsigned int i, refcount_t *r) 177bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r)
178{ 178{
179 unsigned int new, val = atomic_read(&r->refs); 179 unsigned int new, val = atomic_read(&r->refs);
180 180
@@ -192,10 +192,10 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
192 192
193 return !new; 193 return !new;
194} 194}
195EXPORT_SYMBOL(refcount_sub_and_test); 195EXPORT_SYMBOL(refcount_sub_and_test_checked);
196 196
197/** 197/**
198 * refcount_dec_and_test - decrement a refcount and test if it is 0 198 * refcount_dec_and_test_checked - decrement a refcount and test if it is 0
199 * @r: the refcount 199 * @r: the refcount
200 * 200 *
201 * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to 201 * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
@@ -207,14 +207,14 @@ EXPORT_SYMBOL(refcount_sub_and_test);
207 * 207 *
208 * Return: true if the resulting refcount is 0, false otherwise 208 * Return: true if the resulting refcount is 0, false otherwise
209 */ 209 */
210bool refcount_dec_and_test(refcount_t *r) 210bool refcount_dec_and_test_checked(refcount_t *r)
211{ 211{
212 return refcount_sub_and_test(1, r); 212 return refcount_sub_and_test_checked(1, r);
213} 213}
214EXPORT_SYMBOL(refcount_dec_and_test); 214EXPORT_SYMBOL(refcount_dec_and_test_checked);
215 215
216/** 216/**
217 * refcount_dec - decrement a refcount 217 * refcount_dec_checked - decrement a refcount
218 * @r: the refcount 218 * @r: the refcount
219 * 219 *
220 * Similar to atomic_dec(), it will WARN on underflow and fail to decrement 220 * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
@@ -223,12 +223,11 @@ EXPORT_SYMBOL(refcount_dec_and_test);
223 * Provides release memory ordering, such that prior loads and stores are done 223 * Provides release memory ordering, such that prior loads and stores are done
224 * before. 224 * before.
225 */ 225 */
226void refcount_dec(refcount_t *r) 226void refcount_dec_checked(refcount_t *r)
227{ 227{
228 WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); 228 WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n");
229} 229}
230EXPORT_SYMBOL(refcount_dec); 230EXPORT_SYMBOL(refcount_dec_checked);
231#endif /* CONFIG_REFCOUNT_FULL */
232 231
233/** 232/**
234 * refcount_dec_if_one - decrement a refcount if it is 1 233 * refcount_dec_if_one - decrement a refcount if it is 1
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 039ddbc574e9..3103099f64fd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3167,6 +3167,13 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf)
3167 return 0; 3167 return 0;
3168} 3168}
3169 3169
3170/*
3171 * When a new function is introduced to vm_operations_struct and added
3172 * to hugetlb_vm_ops, please consider adding the function to shm_vm_ops.
3173 * This is because under System V memory model, mappings created via
3174 * shmget/shmat with "huge page" specified are backed by hugetlbfs files,
3175 * their original vm_ops are overwritten with shm_vm_ops.
3176 */
3170const struct vm_operations_struct hugetlb_vm_ops = { 3177const struct vm_operations_struct hugetlb_vm_ops = {
3171 .fault = hugetlb_vm_op_fault, 3178 .fault = hugetlb_vm_op_fault,
3172 .open = hugetlb_vm_op_open, 3179 .open = hugetlb_vm_op_open,
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f0179c9c04c2..a787a319211e 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -15,6 +15,16 @@
15#define INIT_MM_CONTEXT(name) 15#define INIT_MM_CONTEXT(name)
16#endif 16#endif
17 17
18/*
19 * For dynamically allocated mm_structs, there is a dynamically sized cpumask
20 * at the end of the structure, the size of which depends on the maximum CPU
21 * number the system can see. That way we allocate only as much memory for
22 * mm_cpumask() as needed for the hundreds, or thousands of processes that
23 * a system typically runs.
24 *
25 * Since there is only one init_mm in the entire system, keep it simple
26 * and size this cpu_bitmask to NR_CPUS.
27 */
18struct mm_struct init_mm = { 28struct mm_struct init_mm = {
19 .mm_rb = RB_ROOT, 29 .mm_rb = RB_ROOT,
20 .pgd = swapper_pg_dir, 30 .pgd = swapper_pg_dir,
@@ -25,5 +35,6 @@ struct mm_struct init_mm = {
25 .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), 35 .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
26 .mmlist = LIST_HEAD_INIT(init_mm.mmlist), 36 .mmlist = LIST_HEAD_INIT(init_mm.mmlist),
27 .user_ns = &init_user_ns, 37 .user_ns = &init_user_ns,
38 .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
28 INIT_MM_CONTEXT(init_mm) 39 INIT_MM_CONTEXT(init_mm)
29}; 40};
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8c0280b3143e..b2173f7e5164 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4037,6 +4037,14 @@ static struct cftype mem_cgroup_legacy_files[] = {
4037 4037
4038static DEFINE_IDR(mem_cgroup_idr); 4038static DEFINE_IDR(mem_cgroup_idr);
4039 4039
4040static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
4041{
4042 if (memcg->id.id > 0) {
4043 idr_remove(&mem_cgroup_idr, memcg->id.id);
4044 memcg->id.id = 0;
4045 }
4046}
4047
4040static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) 4048static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
4041{ 4049{
4042 VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0); 4050 VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
@@ -4047,8 +4055,7 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
4047{ 4055{
4048 VM_BUG_ON(atomic_read(&memcg->id.ref) < n); 4056 VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
4049 if (atomic_sub_and_test(n, &memcg->id.ref)) { 4057 if (atomic_sub_and_test(n, &memcg->id.ref)) {
4050 idr_remove(&mem_cgroup_idr, memcg->id.id); 4058 mem_cgroup_id_remove(memcg);
4051 memcg->id.id = 0;
4052 4059
4053 /* Memcg ID pins CSS */ 4060 /* Memcg ID pins CSS */
4054 css_put(&memcg->css); 4061 css_put(&memcg->css);
@@ -4185,8 +4192,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
4185 idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); 4192 idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
4186 return memcg; 4193 return memcg;
4187fail: 4194fail:
4188 if (memcg->id.id > 0) 4195 mem_cgroup_id_remove(memcg);
4189 idr_remove(&mem_cgroup_idr, memcg->id.id);
4190 __mem_cgroup_free(memcg); 4196 __mem_cgroup_free(memcg);
4191 return NULL; 4197 return NULL;
4192} 4198}
@@ -4245,6 +4251,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4245 4251
4246 return &memcg->css; 4252 return &memcg->css;
4247fail: 4253fail:
4254 mem_cgroup_id_remove(memcg);
4248 mem_cgroup_free(memcg); 4255 mem_cgroup_free(memcg);
4249 return ERR_PTR(-ENOMEM); 4256 return ERR_PTR(-ENOMEM);
4250} 4257}
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..3d0a74ab70f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
326 326
327#ifdef CONFIG_HAVE_RCU_TABLE_FREE 327#ifdef CONFIG_HAVE_RCU_TABLE_FREE
328 328
329/*
330 * See the comment near struct mmu_table_batch.
331 */
332
333static void tlb_remove_table_smp_sync(void *arg) 329static void tlb_remove_table_smp_sync(void *arg)
334{ 330{
335 /* Simply deliver the interrupt */ 331 struct mm_struct __maybe_unused *mm = arg;
332 /*
333 * On most architectures this does nothing. Simply delivering the
334 * interrupt is enough to prevent races with software page table
335 * walking like that done in get_user_pages_fast.
336 *
337 * See the comment near struct mmu_table_batch.
338 */
339 tlb_flush_remove_tables_local(mm);
336} 340}
337 341
338static void tlb_remove_table_one(void *table) 342static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
339{ 343{
340 /* 344 /*
341 * This isn't an RCU grace period and hence the page-tables cannot be 345 * This isn't an RCU grace period and hence the page-tables cannot be
@@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table)
344 * It is however sufficient for software page-table walkers that rely on 348 * It is however sufficient for software page-table walkers that rely on
345 * IRQ disabling. See the comment near struct mmu_table_batch. 349 * IRQ disabling. See the comment near struct mmu_table_batch.
346 */ 350 */
347 smp_call_function(tlb_remove_table_smp_sync, NULL, 1); 351 smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);
348 __tlb_remove_table(table); 352 __tlb_remove_table(table);
349} 353}
350 354
@@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb)
365{ 369{
366 struct mmu_table_batch **batch = &tlb->batch; 370 struct mmu_table_batch **batch = &tlb->batch;
367 371
372 tlb_flush_remove_tables(tlb->mm);
373
368 if (*batch) { 374 if (*batch) {
369 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); 375 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
370 *batch = NULL; 376 *batch = NULL;
@@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
387 if (*batch == NULL) { 393 if (*batch == NULL) {
388 *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); 394 *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
389 if (*batch == NULL) { 395 if (*batch == NULL) {
390 tlb_remove_table_one(table); 396 tlb_remove_table_one(table, tlb);
391 return; 397 return;
392 } 398 }
393 (*batch)->nr = 0; 399 (*batch)->nr = 0;
@@ -1417,11 +1423,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
1417 do { 1423 do {
1418 next = pmd_addr_end(addr, end); 1424 next = pmd_addr_end(addr, end);
1419 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { 1425 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
1420 if (next - addr != HPAGE_PMD_SIZE) { 1426 if (next - addr != HPAGE_PMD_SIZE)
1421 VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
1422 !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
1423 __split_huge_pmd(vma, pmd, addr, false, NULL); 1427 __split_huge_pmd(vma, pmd, addr, false, NULL);
1424 } else if (zap_huge_pmd(tlb, vma, pmd, addr)) 1428 else if (zap_huge_pmd(tlb, vma, pmd, addr))
1425 goto next; 1429 goto next;
1426 /* fall through */ 1430 /* fall through */
1427 } 1431 }
@@ -4397,6 +4401,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
4397 return -EINVAL; 4401 return -EINVAL;
4398 4402
4399 maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); 4403 maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
4404 if (!maddr)
4405 return -ENOMEM;
4406
4400 if (write) 4407 if (write)
4401 memcpy_toio(maddr + offset, buf, len); 4408 memcpy_toio(maddr + offset, buf, len);
4402 else 4409 else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a790ef4be74e..3222193c46c6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6939,9 +6939,21 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
6939 start = (void *)PAGE_ALIGN((unsigned long)start); 6939 start = (void *)PAGE_ALIGN((unsigned long)start);
6940 end = (void *)((unsigned long)end & PAGE_MASK); 6940 end = (void *)((unsigned long)end & PAGE_MASK);
6941 for (pos = start; pos < end; pos += PAGE_SIZE, pages++) { 6941 for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
6942 struct page *page = virt_to_page(pos);
6943 void *direct_map_addr;
6944
6945 /*
6946 * 'direct_map_addr' might be different from 'pos'
6947 * because some architectures' virt_to_page()
6948 * work with aliases. Getting the direct map
6949 * address ensures that we get a _writeable_
6950 * alias for the memset().
6951 */
6952 direct_map_addr = page_address(page);
6942 if ((unsigned int)poison <= 0xFF) 6953 if ((unsigned int)poison <= 0xFF)
6943 memset(pos, poison, PAGE_SIZE); 6954 memset(direct_map_addr, poison, PAGE_SIZE);
6944 free_reserved_page(virt_to_page(pos)); 6955
6956 free_reserved_page(page);
6945 } 6957 }
6946 6958
6947 if (pages && s) 6959 if (pages && s)
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index af8c4b38b746..d84227d75717 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -244,7 +244,7 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
244 * the packet count limit, so... 244 * the packet count limit, so...
245 */ 245 */
246 if (atm_may_send(pvcc->atmvcc, size) && 246 if (atm_may_send(pvcc->atmvcc, size) &&
247 atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT)) 247 atomic_inc_not_zero(&pvcc->inflight))
248 return 1; 248 return 1;
249 249
250 /* 250 /*
diff --git a/net/core/dev.c b/net/core/dev.c
index a5aa1c7444e6..559a91271f82 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7149,16 +7149,19 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
7149 dev->tx_queue_len = new_len; 7149 dev->tx_queue_len = new_len;
7150 res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); 7150 res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
7151 res = notifier_to_errno(res); 7151 res = notifier_to_errno(res);
7152 if (res) { 7152 if (res)
7153 netdev_err(dev, 7153 goto err_rollback;
7154 "refused to change device tx_queue_len\n"); 7154 res = dev_qdisc_change_tx_queue_len(dev);
7155 dev->tx_queue_len = orig_len; 7155 if (res)
7156 return res; 7156 goto err_rollback;
7157 }
7158 return dev_qdisc_change_tx_queue_len(dev);
7159 } 7157 }
7160 7158
7161 return 0; 7159 return 0;
7160
7161err_rollback:
7162 netdev_err(dev, "refused to change device tx_queue_len\n");
7163 dev->tx_queue_len = orig_len;
7164 return res;
7162} 7165}
7163 7166
7164/** 7167/**
diff --git a/net/core/filter.c b/net/core/filter.c
index 06da770f543f..9dfd145eedcc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1712,24 +1712,26 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1712BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, 1712BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1713 u32, offset, void *, to, u32, len, u32, start_header) 1713 u32, offset, void *, to, u32, len, u32, start_header)
1714{ 1714{
1715 u8 *end = skb_tail_pointer(skb);
1716 u8 *net = skb_network_header(skb);
1717 u8 *mac = skb_mac_header(skb);
1715 u8 *ptr; 1718 u8 *ptr;
1716 1719
1717 if (unlikely(offset > 0xffff || len > skb_headlen(skb))) 1720 if (unlikely(offset > 0xffff || len > (end - mac)))
1718 goto err_clear; 1721 goto err_clear;
1719 1722
1720 switch (start_header) { 1723 switch (start_header) {
1721 case BPF_HDR_START_MAC: 1724 case BPF_HDR_START_MAC:
1722 ptr = skb_mac_header(skb) + offset; 1725 ptr = mac + offset;
1723 break; 1726 break;
1724 case BPF_HDR_START_NET: 1727 case BPF_HDR_START_NET:
1725 ptr = skb_network_header(skb) + offset; 1728 ptr = net + offset;
1726 break; 1729 break;
1727 default: 1730 default:
1728 goto err_clear; 1731 goto err_clear;
1729 } 1732 }
1730 1733
1731 if (likely(ptr >= skb_mac_header(skb) && 1734 if (likely(ptr >= mac && ptr + len <= end)) {
1732 ptr + len <= skb_tail_pointer(skb))) {
1733 memcpy(to, ptr, len); 1735 memcpy(to, ptr, len);
1734 return 0; 1736 return 0;
1735 } 1737 }
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index e7e626fb87bb..e45098593dc0 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -217,7 +217,7 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
217 if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) 217 if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME])
218 return -EINVAL; 218 return -EINVAL;
219 219
220 prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL); 220 prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC);
221 if (!prog->name) 221 if (!prog->name)
222 return -ENOMEM; 222 return -ENOMEM;
223 223
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9d1f22072d5d..6771f1855b96 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -345,7 +345,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
345 rcu_read_lock(); 345 rcu_read_lock();
346 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 346 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
347 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 347 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
348 xa->zc_alloc->free(xa->zc_alloc, handle); 348 if (!WARN_ON_ONCE(!xa))
349 xa->zc_alloc->free(xa->zc_alloc, handle);
349 rcu_read_unlock(); 350 rcu_read_unlock();
350 default: 351 default:
351 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ 352 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 2b75df469220..842a9c7c73a3 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -229,14 +229,16 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
229 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 229 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
230 u32 cwnd = hc->tx_cwnd, restart_cwnd, 230 u32 cwnd = hc->tx_cwnd, restart_cwnd,
231 iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); 231 iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
232 s32 delta = now - hc->tx_lsndtime;
232 233
233 hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); 234 hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
234 235
235 /* don't reduce cwnd below the initial window (IW) */ 236 /* don't reduce cwnd below the initial window (IW) */
236 restart_cwnd = min(cwnd, iwnd); 237 restart_cwnd = min(cwnd, iwnd);
237 cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
238 hc->tx_cwnd = max(cwnd, restart_cwnd);
239 238
239 while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd)
240 cwnd >>= 1;
241 hc->tx_cwnd = max(cwnd, restart_cwnd);
240 hc->tx_cwnd_stamp = now; 242 hc->tx_cwnd_stamp = now;
241 hc->tx_cwnd_used = 0; 243 hc->tx_cwnd_used = 0;
242 244
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1e3b6a6d8a40..9864bcd3d317 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -639,7 +639,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
639 int ret; 639 int ret;
640 640
641 /* Port's PHY and MAC both need to be EEE capable */ 641 /* Port's PHY and MAC both need to be EEE capable */
642 if (!dev->phydev) 642 if (!dev->phydev && !dp->pl)
643 return -ENODEV; 643 return -ENODEV;
644 644
645 if (!ds->ops->set_mac_eee) 645 if (!ds->ops->set_mac_eee)
@@ -659,7 +659,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
659 int ret; 659 int ret;
660 660
661 /* Port's PHY and MAC both need to be EEE capable */ 661 /* Port's PHY and MAC both need to be EEE capable */
662 if (!dev->phydev) 662 if (!dev->phydev && !dp->pl)
663 return -ENODEV; 663 return -ENODEV;
664 664
665 if (!ds->ops->get_mac_eee) 665 if (!ds->ops->get_mac_eee)
@@ -1248,6 +1248,9 @@ int dsa_slave_suspend(struct net_device *slave_dev)
1248{ 1248{
1249 struct dsa_port *dp = dsa_slave_to_port(slave_dev); 1249 struct dsa_port *dp = dsa_slave_to_port(slave_dev);
1250 1250
1251 if (!netif_running(slave_dev))
1252 return 0;
1253
1251 netif_device_detach(slave_dev); 1254 netif_device_detach(slave_dev);
1252 1255
1253 rtnl_lock(); 1256 rtnl_lock();
@@ -1261,6 +1264,9 @@ int dsa_slave_resume(struct net_device *slave_dev)
1261{ 1264{
1262 struct dsa_port *dp = dsa_slave_to_port(slave_dev); 1265 struct dsa_port *dp = dsa_slave_to_port(slave_dev);
1263 1266
1267 if (!netif_running(slave_dev))
1268 return 0;
1269
1264 netif_device_attach(slave_dev); 1270 netif_device_attach(slave_dev);
1265 1271
1266 rtnl_lock(); 1272 rtnl_lock();
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e46cdd310e5f..2998b0e47d4b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -292,19 +292,19 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
292 return ip_hdr(skb)->daddr; 292 return ip_hdr(skb)->daddr;
293 293
294 in_dev = __in_dev_get_rcu(dev); 294 in_dev = __in_dev_get_rcu(dev);
295 BUG_ON(!in_dev);
296 295
297 net = dev_net(dev); 296 net = dev_net(dev);
298 297
299 scope = RT_SCOPE_UNIVERSE; 298 scope = RT_SCOPE_UNIVERSE;
300 if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { 299 if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
300 bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
301 struct flowi4 fl4 = { 301 struct flowi4 fl4 = {
302 .flowi4_iif = LOOPBACK_IFINDEX, 302 .flowi4_iif = LOOPBACK_IFINDEX,
303 .flowi4_oif = l3mdev_master_ifindex_rcu(dev), 303 .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
304 .daddr = ip_hdr(skb)->saddr, 304 .daddr = ip_hdr(skb)->saddr,
305 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), 305 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
306 .flowi4_scope = scope, 306 .flowi4_scope = scope,
307 .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, 307 .flowi4_mark = vmark ? skb->mark : 0,
308 }; 308 };
309 if (!fib_lookup(net, &fl4, &res, 0)) 309 if (!fib_lookup(net, &fl4, &res, 0))
310 return FIB_RES_PREFSRC(net, res); 310 return FIB_RES_PREFSRC(net, res);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 28fef7d15959..75151be21413 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1387,7 +1387,8 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
1387/* 1387/*
1388 * A socket has joined a multicast group on device dev. 1388 * A socket has joined a multicast group on device dev.
1389 */ 1389 */
1390void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode) 1390static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
1391 unsigned int mode)
1391{ 1392{
1392 struct ip_mc_list *im; 1393 struct ip_mc_list *im;
1393#ifdef CONFIG_IP_MULTICAST 1394#ifdef CONFIG_IP_MULTICAST
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 1e4cf3ab560f..0d70608cc2e1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -157,9 +157,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
157{ 157{
158 struct inet_frag_queue *q; 158 struct inet_frag_queue *q;
159 159
160 if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
161 return NULL;
162
163 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); 160 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
164 if (!q) 161 if (!q)
165 return NULL; 162 return NULL;
@@ -204,6 +201,9 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
204{ 201{
205 struct inet_frag_queue *fq; 202 struct inet_frag_queue *fq;
206 203
204 if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
205 return NULL;
206
207 rcu_read_lock(); 207 rcu_read_lock();
208 208
209 fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); 209 fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8e9528ebaa8e..d14d741fb05e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -383,11 +383,16 @@ found:
383 int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */ 383 int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
384 384
385 if (i < next->len) { 385 if (i < next->len) {
386 int delta = -next->truesize;
387
386 /* Eat head of the next overlapped fragment 388 /* Eat head of the next overlapped fragment
387 * and leave the loop. The next ones cannot overlap. 389 * and leave the loop. The next ones cannot overlap.
388 */ 390 */
389 if (!pskb_pull(next, i)) 391 if (!pskb_pull(next, i))
390 goto err; 392 goto err;
393 delta += next->truesize;
394 if (delta)
395 add_frag_mem_limit(qp->q.net, delta);
391 next->ip_defrag_offset += i; 396 next->ip_defrag_offset += i;
392 qp->q.meat -= i; 397 qp->q.meat -= i;
393 if (next->ip_summed != CHECKSUM_UNNECESSARY) 398 if (next->ip_summed != CHECKSUM_UNNECESSARY)
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 58e2f479ffb4..4bfff3c87e8e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -354,6 +354,10 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
354 /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ 354 /* Reduce delayed ACKs by rounding up cwnd to the next even number. */
355 cwnd = (cwnd + 1) & ~1U; 355 cwnd = (cwnd + 1) & ~1U;
356 356
357 /* Ensure gain cycling gets inflight above BDP even for small BDPs. */
358 if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT)
359 cwnd += 2;
360
357 return cwnd; 361 return cwnd;
358} 362}
359 363
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3bcd30a2ba06..f9dcb29be12d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -246,8 +246,15 @@ static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
246 246
247static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) 247static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
248{ 248{
249 if (tcp_hdr(skb)->cwr) 249 if (tcp_hdr(skb)->cwr) {
250 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 250 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
251
252 /* If the sender is telling us it has entered CWR, then its
253 * cwnd may be very low (even just 1 packet), so we should ACK
254 * immediately.
255 */
256 tcp_enter_quickack_mode((struct sock *)tp, 2);
257 }
251} 258}
252 259
253static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) 260static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 97513f35bcc5..88a7579c23bd 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -669,8 +669,10 @@ skip_cow:
669 669
670 sg_init_table(sg, nfrags); 670 sg_init_table(sg, nfrags);
671 ret = skb_to_sgvec(skb, sg, 0, skb->len); 671 ret = skb_to_sgvec(skb, sg, 0, skb->len);
672 if (unlikely(ret < 0)) 672 if (unlikely(ret < 0)) {
673 kfree(tmp);
673 goto out; 674 goto out;
675 }
674 676
675 skb->ip_summed = CHECKSUM_NONE; 677 skb->ip_summed = CHECKSUM_NONE;
676 678
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 00e138a44cbb..1cc9650af9fb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1133,12 +1133,8 @@ route_lookup:
1133 max_headroom += 8; 1133 max_headroom += 8;
1134 mtu -= 8; 1134 mtu -= 8;
1135 } 1135 }
1136 if (skb->protocol == htons(ETH_P_IPV6)) { 1136 mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
1137 if (mtu < IPV6_MIN_MTU) 1137 IPV6_MIN_MTU : IPV4_MIN_MTU);
1138 mtu = IPV6_MIN_MTU;
1139 } else if (mtu < 576) {
1140 mtu = 576;
1141 }
1142 1138
1143 skb_dst_update_pmtu(skb, mtu); 1139 skb_dst_update_pmtu(skb, mtu);
1144 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { 1140 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index b7f28deddaea..c72ae3a4fe09 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -480,10 +480,6 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
480 goto tx_err_dst_release; 480 goto tx_err_dst_release;
481 } 481 }
482 482
483 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
484 skb_dst_set(skb, dst);
485 skb->dev = skb_dst(skb)->dev;
486
487 mtu = dst_mtu(dst); 483 mtu = dst_mtu(dst);
488 if (!skb->ignore_df && skb->len > mtu) { 484 if (!skb->ignore_df && skb->len > mtu) {
489 skb_dst_update_pmtu(skb, mtu); 485 skb_dst_update_pmtu(skb, mtu);
@@ -498,9 +494,14 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
498 htonl(mtu)); 494 htonl(mtu));
499 } 495 }
500 496
501 return -EMSGSIZE; 497 err = -EMSGSIZE;
498 goto tx_err_dst_release;
502 } 499 }
503 500
501 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
502 skb_dst_set(skb, dst);
503 skb->dev = skb_dst(skb)->dev;
504
504 err = dst_output(t->net, skb->sk, skb); 505 err = dst_output(t->net, skb->sk, skb);
505 if (net_xmit_eval(err) == 0) { 506 if (net_xmit_eval(err) == 0) {
506 struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); 507 struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ec18b3ce8b6d..7208c16302f6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -978,10 +978,6 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
978 rt->rt6i_flags &= ~RTF_EXPIRES; 978 rt->rt6i_flags &= ~RTF_EXPIRES;
979 rcu_assign_pointer(rt->from, from); 979 rcu_assign_pointer(rt->from, from);
980 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); 980 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
981 if (from->fib6_metrics != &dst_default_metrics) {
982 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
983 refcount_inc(&from->fib6_metrics->refcnt);
984 }
985} 981}
986 982
987/* Caller must already hold reference to @ort */ 983/* Caller must already hold reference to @ort */
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e398797878a9..cf6cca260e7b 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1201,13 +1201,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
1201 l2tp_session_get(sock_net(sk), tunnel, 1201 l2tp_session_get(sock_net(sk), tunnel,
1202 stats.session_id); 1202 stats.session_id);
1203 1203
1204 if (session && session->pwtype == L2TP_PWTYPE_PPP) { 1204 if (!session) {
1205 err = pppol2tp_session_ioctl(session, cmd, 1205 err = -EBADR;
1206 arg); 1206 break;
1207 }
1208 if (session->pwtype != L2TP_PWTYPE_PPP) {
1207 l2tp_session_dec_refcount(session); 1209 l2tp_session_dec_refcount(session);
1208 } else {
1209 err = -EBADR; 1210 err = -EBADR;
1211 break;
1210 } 1212 }
1213
1214 err = pppol2tp_session_ioctl(session, cmd, arg);
1215 l2tp_session_dec_refcount(session);
1211 break; 1216 break;
1212 } 1217 }
1213#ifdef CONFIG_XFRM 1218#ifdef CONFIG_XFRM
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 89041260784c..260b3dc1b4a2 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value)
73 73
74 rcu_read_lock_bh(); 74 rcu_read_lock_bh();
75 sap = __llc_sap_find(sap_value); 75 sap = __llc_sap_find(sap_value);
76 if (sap) 76 if (!sap || !llc_sap_hold_safe(sap))
77 llc_sap_hold(sap); 77 sap = NULL;
78 rcu_read_unlock_bh(); 78 rcu_read_unlock_bh();
79 return sap; 79 return sap;
80} 80}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 393573a99a5a..56704d95f82d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -63,6 +63,7 @@
63#include <linux/hash.h> 63#include <linux/hash.h>
64#include <linux/genetlink.h> 64#include <linux/genetlink.h>
65#include <linux/net_namespace.h> 65#include <linux/net_namespace.h>
66#include <linux/nospec.h>
66 67
67#include <net/net_namespace.h> 68#include <net/net_namespace.h>
68#include <net/netns/generic.h> 69#include <net/netns/generic.h>
@@ -679,6 +680,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
679 680
680 if (protocol < 0 || protocol >= MAX_LINKS) 681 if (protocol < 0 || protocol >= MAX_LINKS)
681 return -EPROTONOSUPPORT; 682 return -EPROTONOSUPPORT;
683 protocol = array_index_nospec(protocol, MAX_LINKS);
682 684
683 netlink_lock_table(); 685 netlink_lock_table();
684#ifdef CONFIG_MODULES 686#ifdef CONFIG_MODULES
@@ -1009,6 +1011,11 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1009 return err; 1011 return err;
1010 } 1012 }
1011 1013
1014 if (nlk->ngroups == 0)
1015 groups = 0;
1016 else if (nlk->ngroups < 8*sizeof(groups))
1017 groups &= (1UL << nlk->ngroups) - 1;
1018
1012 bound = nlk->bound; 1019 bound = nlk->bound;
1013 if (bound) { 1020 if (bound) {
1014 /* Ensure nlk->portid is up-to-date. */ 1021 /* Ensure nlk->portid is up-to-date. */
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index b891a91577f8..c038e021a591 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -211,6 +211,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
211 if (!meter) 211 if (!meter)
212 return ERR_PTR(-ENOMEM); 212 return ERR_PTR(-ENOMEM);
213 213
214 meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
214 meter->used = div_u64(ktime_get_ns(), 1000 * 1000); 215 meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
215 meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; 216 meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
216 meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; 217 meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
@@ -280,6 +281,10 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
280 u32 meter_id; 281 u32 meter_id;
281 bool failed; 282 bool failed;
282 283
284 if (!a[OVS_METER_ATTR_ID]) {
285 return -ENODEV;
286 }
287
283 meter = dp_meter_create(a); 288 meter = dp_meter_create(a);
284 if (IS_ERR_OR_NULL(meter)) 289 if (IS_ERR_OR_NULL(meter))
285 return PTR_ERR(meter); 290 return PTR_ERR(meter);
@@ -298,11 +303,6 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
298 goto exit_unlock; 303 goto exit_unlock;
299 } 304 }
300 305
301 if (!a[OVS_METER_ATTR_ID]) {
302 err = -ENODEV;
303 goto exit_unlock;
304 }
305
306 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 306 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
307 307
308 /* Cannot fail after this. */ 308 /* Cannot fail after this. */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9b27d0cd766d..e6445d8f3f57 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4226,6 +4226,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4226 } 4226 }
4227 4227
4228 if (req->tp_block_nr) { 4228 if (req->tp_block_nr) {
4229 unsigned int min_frame_size;
4230
4229 /* Sanity tests and some calculations */ 4231 /* Sanity tests and some calculations */
4230 err = -EBUSY; 4232 err = -EBUSY;
4231 if (unlikely(rb->pg_vec)) 4233 if (unlikely(rb->pg_vec))
@@ -4248,12 +4250,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4248 goto out; 4250 goto out;
4249 if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) 4251 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4250 goto out; 4252 goto out;
4253 min_frame_size = po->tp_hdrlen + po->tp_reserve;
4251 if (po->tp_version >= TPACKET_V3 && 4254 if (po->tp_version >= TPACKET_V3 &&
4252 req->tp_block_size <= 4255 req->tp_block_size <
4253 BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr)) 4256 BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
4254 goto out; 4257 goto out;
4255 if (unlikely(req->tp_frame_size < po->tp_hdrlen + 4258 if (unlikely(req->tp_frame_size < min_frame_size))
4256 po->tp_reserve))
4257 goto out; 4259 goto out;
4258 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 4260 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4259 goto out; 4261 goto out;
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 48332a6ed738..d152e48ea371 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -344,6 +344,11 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
344 struct rds_ib_frmr *frmr; 344 struct rds_ib_frmr *frmr;
345 int ret; 345 int ret;
346 346
347 if (!ic) {
348 /* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/
349 return ERR_PTR(-EOPNOTSUPP);
350 }
351
347 do { 352 do {
348 if (ibmr) 353 if (ibmr)
349 rds_ib_free_frmr(ibmr, true); 354 rds_ib_free_frmr(ibmr, true);
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 0ea4ab017a8c..655f01d427fe 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -115,7 +115,8 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
115 struct rds_info_rdma_connection *iinfo); 115 struct rds_info_rdma_connection *iinfo);
116void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 116void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
117void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 117void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
118 struct rds_sock *rs, u32 *key_ret); 118 struct rds_sock *rs, u32 *key_ret,
119 struct rds_connection *conn);
119void rds_ib_sync_mr(void *trans_private, int dir); 120void rds_ib_sync_mr(void *trans_private, int dir);
120void rds_ib_free_mr(void *trans_private, int invalidate); 121void rds_ib_free_mr(void *trans_private, int invalidate);
121void rds_ib_flush_mrs(void); 122void rds_ib_flush_mrs(void);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index e678699268a2..2e49a40a5e11 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -537,11 +537,12 @@ void rds_ib_flush_mrs(void)
537} 537}
538 538
539void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 539void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
540 struct rds_sock *rs, u32 *key_ret) 540 struct rds_sock *rs, u32 *key_ret,
541 struct rds_connection *conn)
541{ 542{
542 struct rds_ib_device *rds_ibdev; 543 struct rds_ib_device *rds_ibdev;
543 struct rds_ib_mr *ibmr = NULL; 544 struct rds_ib_mr *ibmr = NULL;
544 struct rds_ib_connection *ic = rs->rs_conn->c_transport_data; 545 struct rds_ib_connection *ic = NULL;
545 int ret; 546 int ret;
546 547
547 rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); 548 rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
@@ -550,6 +551,9 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
550 goto out; 551 goto out;
551 } 552 }
552 553
554 if (conn)
555 ic = conn->c_transport_data;
556
553 if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { 557 if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
554 ret = -ENODEV; 558 ret = -ENODEV;
555 goto out; 559 goto out;
@@ -559,17 +563,18 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
559 ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); 563 ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
560 else 564 else
561 ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret); 565 ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
562 if (ibmr) 566 if (IS_ERR(ibmr)) {
563 rds_ibdev = NULL; 567 ret = PTR_ERR(ibmr);
564
565 out:
566 if (!ibmr)
567 pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); 568 pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
569 } else {
570 return ibmr;
571 }
568 572
573 out:
569 if (rds_ibdev) 574 if (rds_ibdev)
570 rds_ib_dev_put(rds_ibdev); 575 rds_ib_dev_put(rds_ibdev);
571 576
572 return ibmr; 577 return ERR_PTR(ret);
573} 578}
574 579
575void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 580void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 634cfcb7bba6..80920e47f2c7 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -170,7 +170,8 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
170} 170}
171 171
172static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, 172static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
173 u64 *cookie_ret, struct rds_mr **mr_ret) 173 u64 *cookie_ret, struct rds_mr **mr_ret,
174 struct rds_conn_path *cp)
174{ 175{
175 struct rds_mr *mr = NULL, *found; 176 struct rds_mr *mr = NULL, *found;
176 unsigned int nr_pages; 177 unsigned int nr_pages;
@@ -269,7 +270,8 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
269 * Note that dma_map() implies that pending writes are 270 * Note that dma_map() implies that pending writes are
270 * flushed to RAM, so no dma_sync is needed here. */ 271 * flushed to RAM, so no dma_sync is needed here. */
271 trans_private = rs->rs_transport->get_mr(sg, nents, rs, 272 trans_private = rs->rs_transport->get_mr(sg, nents, rs,
272 &mr->r_key); 273 &mr->r_key,
274 cp ? cp->cp_conn : NULL);
273 275
274 if (IS_ERR(trans_private)) { 276 if (IS_ERR(trans_private)) {
275 for (i = 0 ; i < nents; i++) 277 for (i = 0 ; i < nents; i++)
@@ -330,7 +332,7 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen)
330 sizeof(struct rds_get_mr_args))) 332 sizeof(struct rds_get_mr_args)))
331 return -EFAULT; 333 return -EFAULT;
332 334
333 return __rds_rdma_map(rs, &args, NULL, NULL); 335 return __rds_rdma_map(rs, &args, NULL, NULL, NULL);
334} 336}
335 337
336int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) 338int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
@@ -354,7 +356,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
354 new_args.cookie_addr = args.cookie_addr; 356 new_args.cookie_addr = args.cookie_addr;
355 new_args.flags = args.flags; 357 new_args.flags = args.flags;
356 358
357 return __rds_rdma_map(rs, &new_args, NULL, NULL); 359 return __rds_rdma_map(rs, &new_args, NULL, NULL, NULL);
358} 360}
359 361
360/* 362/*
@@ -782,7 +784,8 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
782 rm->m_rdma_cookie != 0) 784 rm->m_rdma_cookie != 0)
783 return -EINVAL; 785 return -EINVAL;
784 786
785 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr); 787 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie,
788 &rm->rdma.op_rdma_mr, rm->m_conn_path);
786} 789}
787 790
788/* 791/*
diff --git a/net/rds/rds.h b/net/rds/rds.h
index f2272fb8cd45..60b3b787fbdb 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -464,6 +464,8 @@ struct rds_message {
464 struct scatterlist *op_sg; 464 struct scatterlist *op_sg;
465 } data; 465 } data;
466 }; 466 };
467
468 struct rds_conn_path *m_conn_path;
467}; 469};
468 470
469/* 471/*
@@ -544,7 +546,8 @@ struct rds_transport {
544 unsigned int avail); 546 unsigned int avail);
545 void (*exit)(void); 547 void (*exit)(void);
546 void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg, 548 void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
547 struct rds_sock *rs, u32 *key_ret); 549 struct rds_sock *rs, u32 *key_ret,
550 struct rds_connection *conn);
548 void (*sync_mr)(void *trans_private, int direction); 551 void (*sync_mr)(void *trans_private, int direction);
549 void (*free_mr)(void *trans_private, int invalidate); 552 void (*free_mr)(void *trans_private, int invalidate);
550 void (*flush_mrs)(void); 553 void (*flush_mrs)(void);
diff --git a/net/rds/send.c b/net/rds/send.c
index 94c7f74909be..59f17a2335f4 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1169,6 +1169,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1169 rs->rs_conn = conn; 1169 rs->rs_conn = conn;
1170 } 1170 }
1171 1171
1172 if (conn->c_trans->t_mp_capable)
1173 cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)];
1174 else
1175 cpath = &conn->c_path[0];
1176
1177 rm->m_conn_path = cpath;
1178
1172 /* Parse any control messages the user may have included. */ 1179 /* Parse any control messages the user may have included. */
1173 ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); 1180 ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
1174 if (ret) { 1181 if (ret) {
@@ -1192,11 +1199,6 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1192 goto out; 1199 goto out;
1193 } 1200 }
1194 1201
1195 if (conn->c_trans->t_mp_capable)
1196 cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)];
1197 else
1198 cpath = &conn->c_path[0];
1199
1200 if (rds_destroy_pending(conn)) { 1202 if (rds_destroy_pending(conn)) {
1201 ret = -EAGAIN; 1203 ret = -EAGAIN;
1202 goto out; 1204 goto out;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5fb7d3254d9e..707630ab4713 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -104,9 +104,9 @@ struct rxrpc_net {
104 104
105#define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */ 105#define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */
106 u8 peer_keepalive_cursor; 106 u8 peer_keepalive_cursor;
107 ktime_t peer_keepalive_base; 107 time64_t peer_keepalive_base;
108 struct hlist_head peer_keepalive[RXRPC_KEEPALIVE_TIME + 1]; 108 struct list_head peer_keepalive[32];
109 struct hlist_head peer_keepalive_new; 109 struct list_head peer_keepalive_new;
110 struct timer_list peer_keepalive_timer; 110 struct timer_list peer_keepalive_timer;
111 struct work_struct peer_keepalive_work; 111 struct work_struct peer_keepalive_work;
112}; 112};
@@ -295,7 +295,7 @@ struct rxrpc_peer {
295 struct hlist_head error_targets; /* targets for net error distribution */ 295 struct hlist_head error_targets; /* targets for net error distribution */
296 struct work_struct error_distributor; 296 struct work_struct error_distributor;
297 struct rb_root service_conns; /* Service connections */ 297 struct rb_root service_conns; /* Service connections */
298 struct hlist_node keepalive_link; /* Link in net->peer_keepalive[] */ 298 struct list_head keepalive_link; /* Link in net->peer_keepalive[] */
299 time64_t last_tx_at; /* Last time packet sent here */ 299 time64_t last_tx_at; /* Last time packet sent here */
300 seqlock_t service_conn_lock; 300 seqlock_t service_conn_lock;
301 spinlock_t lock; /* access lock */ 301 spinlock_t lock; /* access lock */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index a9a9be5519b9..9d1e298b784c 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -116,9 +116,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
116 while (*pp) { 116 while (*pp) {
117 parent = *pp; 117 parent = *pp;
118 xcall = rb_entry(parent, struct rxrpc_call, sock_node); 118 xcall = rb_entry(parent, struct rxrpc_call, sock_node);
119 if (user_call_ID < call->user_call_ID) 119 if (user_call_ID < xcall->user_call_ID)
120 pp = &(*pp)->rb_left; 120 pp = &(*pp)->rb_left;
121 else if (user_call_ID > call->user_call_ID) 121 else if (user_call_ID > xcall->user_call_ID)
122 pp = &(*pp)->rb_right; 122 pp = &(*pp)->rb_right;
123 else 123 else
124 goto id_in_use; 124 goto id_in_use;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f6734d8cb01a..9486293fef5c 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -415,7 +415,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
415bool rxrpc_queue_call(struct rxrpc_call *call) 415bool rxrpc_queue_call(struct rxrpc_call *call)
416{ 416{
417 const void *here = __builtin_return_address(0); 417 const void *here = __builtin_return_address(0);
418 int n = __atomic_add_unless(&call->usage, 1, 0); 418 int n = atomic_fetch_add_unless(&call->usage, 1, 0);
419 if (n == 0) 419 if (n == 0)
420 return false; 420 return false;
421 if (rxrpc_queue_work(&call->processor)) 421 if (rxrpc_queue_work(&call->processor))
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 8229a52c2acd..3fde001fcc39 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -136,7 +136,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
136 } 136 }
137 137
138 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); 138 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
139 conn->params.peer->last_tx_at = ktime_get_real(); 139 conn->params.peer->last_tx_at = ktime_get_seconds();
140 if (ret < 0) 140 if (ret < 0)
141 trace_rxrpc_tx_fail(conn->debug_id, serial, ret, 141 trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
142 rxrpc_tx_fail_call_final_resend); 142 rxrpc_tx_fail_call_final_resend);
@@ -245,7 +245,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
245 return -EAGAIN; 245 return -EAGAIN;
246 } 246 }
247 247
248 conn->params.peer->last_tx_at = ktime_get_real(); 248 conn->params.peer->last_tx_at = ktime_get_seconds();
249 249
250 _leave(" = 0"); 250 _leave(" = 0");
251 return 0; 251 return 0;
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 4c77a78a252a..77440a356b14 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -266,7 +266,7 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
266bool rxrpc_queue_conn(struct rxrpc_connection *conn) 266bool rxrpc_queue_conn(struct rxrpc_connection *conn)
267{ 267{
268 const void *here = __builtin_return_address(0); 268 const void *here = __builtin_return_address(0);
269 int n = __atomic_add_unless(&conn->usage, 1, 0); 269 int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
270 if (n == 0) 270 if (n == 0)
271 return false; 271 return false;
272 if (rxrpc_queue_work(&conn->processor)) 272 if (rxrpc_queue_work(&conn->processor))
@@ -309,7 +309,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
309 const void *here = __builtin_return_address(0); 309 const void *here = __builtin_return_address(0);
310 310
311 if (conn) { 311 if (conn) {
312 int n = __atomic_add_unless(&conn->usage, 1, 0); 312 int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
313 if (n > 0) 313 if (n > 0)
314 trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); 314 trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
315 else 315 else
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index b493e6b62740..777c3ed4cfc0 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -305,7 +305,7 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
305 const void *here = __builtin_return_address(0); 305 const void *here = __builtin_return_address(0);
306 306
307 if (local) { 307 if (local) {
308 int n = __atomic_add_unless(&local->usage, 1, 0); 308 int n = atomic_fetch_add_unless(&local->usage, 1, 0);
309 if (n > 0) 309 if (n > 0)
310 trace_rxrpc_local(local, rxrpc_local_got, n + 1, here); 310 trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
311 else 311 else
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 5d6a773db973..417d80867c4f 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -85,12 +85,12 @@ static __net_init int rxrpc_init_net(struct net *net)
85 hash_init(rxnet->peer_hash); 85 hash_init(rxnet->peer_hash);
86 spin_lock_init(&rxnet->peer_hash_lock); 86 spin_lock_init(&rxnet->peer_hash_lock);
87 for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++) 87 for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++)
88 INIT_HLIST_HEAD(&rxnet->peer_keepalive[i]); 88 INIT_LIST_HEAD(&rxnet->peer_keepalive[i]);
89 INIT_HLIST_HEAD(&rxnet->peer_keepalive_new); 89 INIT_LIST_HEAD(&rxnet->peer_keepalive_new);
90 timer_setup(&rxnet->peer_keepalive_timer, 90 timer_setup(&rxnet->peer_keepalive_timer,
91 rxrpc_peer_keepalive_timeout, 0); 91 rxrpc_peer_keepalive_timeout, 0);
92 INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker); 92 INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker);
93 rxnet->peer_keepalive_base = ktime_add(ktime_get_real(), NSEC_PER_SEC); 93 rxnet->peer_keepalive_base = ktime_get_seconds();
94 94
95 ret = -ENOMEM; 95 ret = -ENOMEM;
96 rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); 96 rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f03de1c59ba3..4774c8f5634d 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -209,7 +209,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
209 now = ktime_get_real(); 209 now = ktime_get_real();
210 if (ping) 210 if (ping)
211 call->ping_time = now; 211 call->ping_time = now;
212 conn->params.peer->last_tx_at = ktime_get_real(); 212 conn->params.peer->last_tx_at = ktime_get_seconds();
213 if (ret < 0) 213 if (ret < 0)
214 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 214 trace_rxrpc_tx_fail(call->debug_id, serial, ret,
215 rxrpc_tx_fail_call_ack); 215 rxrpc_tx_fail_call_ack);
@@ -296,7 +296,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
296 296
297 ret = kernel_sendmsg(conn->params.local->socket, 297 ret = kernel_sendmsg(conn->params.local->socket,
298 &msg, iov, 1, sizeof(pkt)); 298 &msg, iov, 1, sizeof(pkt));
299 conn->params.peer->last_tx_at = ktime_get_real(); 299 conn->params.peer->last_tx_at = ktime_get_seconds();
300 if (ret < 0) 300 if (ret < 0)
301 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 301 trace_rxrpc_tx_fail(call->debug_id, serial, ret,
302 rxrpc_tx_fail_call_abort); 302 rxrpc_tx_fail_call_abort);
@@ -391,7 +391,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
391 * message and update the peer record 391 * message and update the peer record
392 */ 392 */
393 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 393 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
394 conn->params.peer->last_tx_at = ktime_get_real(); 394 conn->params.peer->last_tx_at = ktime_get_seconds();
395 395
396 up_read(&conn->params.local->defrag_sem); 396 up_read(&conn->params.local->defrag_sem);
397 if (ret < 0) 397 if (ret < 0)
@@ -457,7 +457,7 @@ send_fragmentable:
457 if (ret == 0) { 457 if (ret == 0) {
458 ret = kernel_sendmsg(conn->params.local->socket, &msg, 458 ret = kernel_sendmsg(conn->params.local->socket, &msg,
459 iov, 2, len); 459 iov, 2, len);
460 conn->params.peer->last_tx_at = ktime_get_real(); 460 conn->params.peer->last_tx_at = ktime_get_seconds();
461 461
462 opt = IP_PMTUDISC_DO; 462 opt = IP_PMTUDISC_DO;
463 kernel_setsockopt(conn->params.local->socket, SOL_IP, 463 kernel_setsockopt(conn->params.local->socket, SOL_IP,
@@ -475,7 +475,7 @@ send_fragmentable:
475 if (ret == 0) { 475 if (ret == 0) {
476 ret = kernel_sendmsg(conn->params.local->socket, &msg, 476 ret = kernel_sendmsg(conn->params.local->socket, &msg,
477 iov, 2, len); 477 iov, 2, len);
478 conn->params.peer->last_tx_at = ktime_get_real(); 478 conn->params.peer->last_tx_at = ktime_get_seconds();
479 479
480 opt = IPV6_PMTUDISC_DO; 480 opt = IPV6_PMTUDISC_DO;
481 kernel_setsockopt(conn->params.local->socket, 481 kernel_setsockopt(conn->params.local->socket,
@@ -599,6 +599,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
599 trace_rxrpc_tx_fail(peer->debug_id, 0, ret, 599 trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
600 rxrpc_tx_fail_version_keepalive); 600 rxrpc_tx_fail_version_keepalive);
601 601
602 peer->last_tx_at = ktime_get_real(); 602 peer->last_tx_at = ktime_get_seconds();
603 _leave(""); 603 _leave("");
604} 604}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 0ed8b651cec2..4f9da2f51c69 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -350,97 +350,117 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
350} 350}
351 351
352/* 352/*
353 * Perform keep-alive pings with VERSION packets to keep any NAT alive. 353 * Perform keep-alive pings.
354 */ 354 */
355void rxrpc_peer_keepalive_worker(struct work_struct *work) 355static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
356 struct list_head *collector,
357 time64_t base,
358 u8 cursor)
356{ 359{
357 struct rxrpc_net *rxnet =
358 container_of(work, struct rxrpc_net, peer_keepalive_work);
359 struct rxrpc_peer *peer; 360 struct rxrpc_peer *peer;
360 unsigned long delay; 361 const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
361 ktime_t base, now = ktime_get_real(); 362 time64_t keepalive_at;
362 s64 diff; 363 int slot;
363 u8 cursor, slot;
364 364
365 base = rxnet->peer_keepalive_base; 365 spin_lock_bh(&rxnet->peer_hash_lock);
366 cursor = rxnet->peer_keepalive_cursor;
367 366
368 _enter("%u,%lld", cursor, ktime_sub(now, base)); 367 while (!list_empty(collector)) {
368 peer = list_entry(collector->next,
369 struct rxrpc_peer, keepalive_link);
369 370
370next_bucket: 371 list_del_init(&peer->keepalive_link);
371 diff = ktime_to_ns(ktime_sub(now, base)); 372 if (!rxrpc_get_peer_maybe(peer))
372 if (diff < 0) 373 continue;
373 goto resched;
374 374
375 _debug("at %u", cursor);
376 spin_lock_bh(&rxnet->peer_hash_lock);
377next_peer:
378 if (!rxnet->live) {
379 spin_unlock_bh(&rxnet->peer_hash_lock); 375 spin_unlock_bh(&rxnet->peer_hash_lock);
380 goto out;
381 }
382 376
383 /* Everything in the bucket at the cursor is processed this second; the 377 keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
384 * bucket at cursor + 1 goes now + 1s and so on... 378 slot = keepalive_at - base;
385 */ 379 _debug("%02x peer %u t=%d {%pISp}",
386 if (hlist_empty(&rxnet->peer_keepalive[cursor])) { 380 cursor, peer->debug_id, slot, &peer->srx.transport);
387 if (hlist_empty(&rxnet->peer_keepalive_new)) { 381
388 spin_unlock_bh(&rxnet->peer_hash_lock); 382 if (keepalive_at <= base ||
389 goto emptied_bucket; 383 keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
384 rxrpc_send_keepalive(peer);
385 slot = RXRPC_KEEPALIVE_TIME;
390 } 386 }
391 387
392 hlist_move_list(&rxnet->peer_keepalive_new, 388 /* A transmission to this peer occurred since last we examined
393 &rxnet->peer_keepalive[cursor]); 389 * it so put it into the appropriate future bucket.
390 */
391 slot += cursor;
392 slot &= mask;
393 spin_lock_bh(&rxnet->peer_hash_lock);
394 list_add_tail(&peer->keepalive_link,
395 &rxnet->peer_keepalive[slot & mask]);
396 rxrpc_put_peer(peer);
394 } 397 }
395 398
396 peer = hlist_entry(rxnet->peer_keepalive[cursor].first,
397 struct rxrpc_peer, keepalive_link);
398 hlist_del_init(&peer->keepalive_link);
399 if (!rxrpc_get_peer_maybe(peer))
400 goto next_peer;
401
402 spin_unlock_bh(&rxnet->peer_hash_lock); 399 spin_unlock_bh(&rxnet->peer_hash_lock);
400}
403 401
404 _debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport); 402/*
403 * Perform keep-alive pings with VERSION packets to keep any NAT alive.
404 */
405void rxrpc_peer_keepalive_worker(struct work_struct *work)
406{
407 struct rxrpc_net *rxnet =
408 container_of(work, struct rxrpc_net, peer_keepalive_work);
409 const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
410 time64_t base, now, delay;
411 u8 cursor, stop;
412 LIST_HEAD(collector);
405 413
406recalc: 414 now = ktime_get_seconds();
407 diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC); 415 base = rxnet->peer_keepalive_base;
408 if (diff < -30 || diff > 30) 416 cursor = rxnet->peer_keepalive_cursor;
409 goto send; /* LSW of 64-bit time probably wrapped on 32-bit */ 417 _enter("%lld,%u", base - now, cursor);
410 diff += RXRPC_KEEPALIVE_TIME - 1;
411 if (diff < 0)
412 goto send;
413 418
414 slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff; 419 if (!rxnet->live)
415 if (slot == 0) 420 return;
416 goto send;
417 421
418 /* A transmission to this peer occurred since last we examined it so 422 /* Remove to a temporary list all the peers that are currently lodged
419 * put it into the appropriate future bucket. 423 * in expired buckets plus all new peers.
424 *
425 * Everything in the bucket at the cursor is processed this
426 * second; the bucket at cursor + 1 goes at now + 1s and so
427 * on...
420 */ 428 */
421 slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive);
422 spin_lock_bh(&rxnet->peer_hash_lock); 429 spin_lock_bh(&rxnet->peer_hash_lock);
423 hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]); 430 list_splice_init(&rxnet->peer_keepalive_new, &collector);
424 rxrpc_put_peer(peer); 431
425 goto next_peer; 432 stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
426 433 while (base <= now && (s8)(cursor - stop) < 0) {
427send: 434 list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask],
428 rxrpc_send_keepalive(peer); 435 &collector);
429 now = ktime_get_real(); 436 base++;
430 goto recalc; 437 cursor++;
438 }
431 439
432emptied_bucket: 440 base = now;
433 cursor++; 441 spin_unlock_bh(&rxnet->peer_hash_lock);
434 if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive))
435 cursor = 0;
436 base = ktime_add_ns(base, NSEC_PER_SEC);
437 goto next_bucket;
438 442
439resched:
440 rxnet->peer_keepalive_base = base; 443 rxnet->peer_keepalive_base = base;
441 rxnet->peer_keepalive_cursor = cursor; 444 rxnet->peer_keepalive_cursor = cursor;
442 delay = nsecs_to_jiffies(-diff) + 1; 445 rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor);
443 timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); 446 ASSERT(list_empty(&collector));
444out: 447
448 /* Schedule the timer for the next occupied timeslot. */
449 cursor = rxnet->peer_keepalive_cursor;
450 stop = cursor + RXRPC_KEEPALIVE_TIME - 1;
451 for (; (s8)(cursor - stop) < 0; cursor++) {
452 if (!list_empty(&rxnet->peer_keepalive[cursor & mask]))
453 break;
454 base++;
455 }
456
457 now = ktime_get_seconds();
458 delay = base - now;
459 if (delay < 1)
460 delay = 1;
461 delay *= HZ;
462 if (rxnet->live)
463 timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
464
445 _leave(""); 465 _leave("");
446} 466}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 1b7e8107b3ae..1dc7648e3eff 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -322,7 +322,7 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
322 if (!peer) { 322 if (!peer) {
323 peer = prealloc; 323 peer = prealloc;
324 hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); 324 hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
325 hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive_new); 325 list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new);
326 } 326 }
327 327
328 spin_unlock(&rxnet->peer_hash_lock); 328 spin_unlock(&rxnet->peer_hash_lock);
@@ -367,8 +367,8 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
367 if (!peer) { 367 if (!peer) {
368 hash_add_rcu(rxnet->peer_hash, 368 hash_add_rcu(rxnet->peer_hash,
369 &candidate->hash_link, hash_key); 369 &candidate->hash_link, hash_key);
370 hlist_add_head(&candidate->keepalive_link, 370 list_add_tail(&candidate->keepalive_link,
371 &rxnet->peer_keepalive_new); 371 &rxnet->peer_keepalive_new);
372 } 372 }
373 373
374 spin_unlock_bh(&rxnet->peer_hash_lock); 374 spin_unlock_bh(&rxnet->peer_hash_lock);
@@ -406,7 +406,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
406 const void *here = __builtin_return_address(0); 406 const void *here = __builtin_return_address(0);
407 407
408 if (peer) { 408 if (peer) {
409 int n = __atomic_add_unless(&peer->usage, 1, 0); 409 int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
410 if (n > 0) 410 if (n > 0)
411 trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here); 411 trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
412 else 412 else
@@ -441,7 +441,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
441 441
442 spin_lock_bh(&rxnet->peer_hash_lock); 442 spin_lock_bh(&rxnet->peer_hash_lock);
443 hash_del_rcu(&peer->hash_link); 443 hash_del_rcu(&peer->hash_link);
444 hlist_del_init(&peer->keepalive_link); 444 list_del_init(&peer->keepalive_link);
445 spin_unlock_bh(&rxnet->peer_hash_lock); 445 spin_unlock_bh(&rxnet->peer_hash_lock);
446 446
447 kfree_rcu(peer, rcu); 447 kfree_rcu(peer, rcu);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 278ac0807a60..47cb019c521a 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -669,7 +669,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
669 return -EAGAIN; 669 return -EAGAIN;
670 } 670 }
671 671
672 conn->params.peer->last_tx_at = ktime_get_real(); 672 conn->params.peer->last_tx_at = ktime_get_seconds();
673 _leave(" = 0"); 673 _leave(" = 0");
674 return 0; 674 return 0;
675} 675}
@@ -725,7 +725,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
725 return -EAGAIN; 725 return -EAGAIN;
726 } 726 }
727 727
728 conn->params.peer->last_tx_at = ktime_get_real(); 728 conn->params.peer->last_tx_at = ktime_get_seconds();
729 _leave(" = 0"); 729 _leave(" = 0");
730 return 0; 730 return 0;
731} 731}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 05e4ffe5aabd..e7de5f282722 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1122,6 +1122,8 @@ static void smc_tcp_listen_work(struct work_struct *work)
1122 sock_hold(lsk); /* sock_put in smc_listen_work */ 1122 sock_hold(lsk); /* sock_put in smc_listen_work */
1123 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1123 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1124 smc_copy_sock_settings_to_smc(new_smc); 1124 smc_copy_sock_settings_to_smc(new_smc);
1125 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1126 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
1125 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1127 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1126 if (!schedule_work(&new_smc->smc_listen_work)) 1128 if (!schedule_work(&new_smc->smc_listen_work))
1127 sock_put(&new_smc->sk); 1129 sock_put(&new_smc->sk);
@@ -1397,8 +1399,7 @@ static int smc_shutdown(struct socket *sock, int how)
1397 lock_sock(sk); 1399 lock_sock(sk);
1398 1400
1399 rc = -ENOTCONN; 1401 rc = -ENOTCONN;
1400 if ((sk->sk_state != SMC_LISTEN) && 1402 if ((sk->sk_state != SMC_ACTIVE) &&
1401 (sk->sk_state != SMC_ACTIVE) &&
1402 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1403 (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1403 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1404 (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1404 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1405 (sk->sk_state != SMC_APPCLOSEWAIT1) &&
@@ -1521,12 +1522,16 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
1521 1522
1522 smc = smc_sk(sock->sk); 1523 smc = smc_sk(sock->sk);
1523 conn = &smc->conn; 1524 conn = &smc->conn;
1525 lock_sock(&smc->sk);
1524 if (smc->use_fallback) { 1526 if (smc->use_fallback) {
1525 if (!smc->clcsock) 1527 if (!smc->clcsock) {
1528 release_sock(&smc->sk);
1526 return -EBADF; 1529 return -EBADF;
1527 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1530 }
1531 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1532 release_sock(&smc->sk);
1533 return answ;
1528 } 1534 }
1529 lock_sock(&smc->sk);
1530 switch (cmd) { 1535 switch (cmd) {
1531 case SIOCINQ: /* same as FIONREAD */ 1536 case SIOCINQ: /* same as FIONREAD */
1532 if (smc->sk.sk_state == SMC_LISTEN) { 1537 if (smc->sk.sk_state == SMC_LISTEN) {
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index a7e8d63fc8ae..9bde1e4ca288 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -233,7 +233,8 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
233 /* force immediate tx of current consumer cursor, but 233 /* force immediate tx of current consumer cursor, but
234 * under send_lock to guarantee arrival in seqno-order 234 * under send_lock to guarantee arrival in seqno-order
235 */ 235 */
236 smc_tx_sndbuf_nonempty(conn); 236 if (smc->sk.sk_state != SMC_INIT)
237 smc_tx_sndbuf_nonempty(conn);
237 } 238 }
238 } 239 }
239 240
diff --git a/net/socket.c b/net/socket.c
index 85633622c94d..8c24d5dc4bc8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -89,6 +89,7 @@
89#include <linux/magic.h> 89#include <linux/magic.h>
90#include <linux/slab.h> 90#include <linux/slab.h>
91#include <linux/xattr.h> 91#include <linux/xattr.h>
92#include <linux/nospec.h>
92 93
93#include <linux/uaccess.h> 94#include <linux/uaccess.h>
94#include <asm/unistd.h> 95#include <asm/unistd.h>
@@ -2522,6 +2523,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2522 2523
2523 if (call < 1 || call > SYS_SENDMMSG) 2524 if (call < 1 || call > SYS_SENDMMSG)
2524 return -EINVAL; 2525 return -EINVAL;
2526 call = array_index_nospec(call, SYS_SENDMMSG + 1);
2525 2527
2526 len = nargs[call]; 2528 len = nargs[call];
2527 if (len > sizeof(a)) 2529 if (len > sizeof(a))
@@ -2688,7 +2690,8 @@ EXPORT_SYMBOL(sock_unregister);
2688 2690
2689bool sock_is_registered(int family) 2691bool sock_is_registered(int family)
2690{ 2692{
2691 return family < NPROTO && rcu_access_pointer(net_families[family]); 2693 return family < NPROTO &&
2694 rcu_access_pointer(net_families[array_index_nospec(family, NPROTO)]);
2692} 2695}
2693 2696
2694static int __init sock_init(void) 2697static int __init sock_init(void)
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a7f6964c3a4b..62199cf5a56c 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -123,15 +123,13 @@ void tipc_net_finalize(struct net *net, u32 addr)
123{ 123{
124 struct tipc_net *tn = tipc_net(net); 124 struct tipc_net *tn = tipc_net(net);
125 125
126 spin_lock_bh(&tn->node_list_lock); 126 if (!cmpxchg(&tn->node_addr, 0, addr)) {
127 if (!tipc_own_addr(net)) {
128 tipc_set_node_addr(net, addr); 127 tipc_set_node_addr(net, addr);
129 tipc_named_reinit(net); 128 tipc_named_reinit(net);
130 tipc_sk_reinit(net); 129 tipc_sk_reinit(net);
131 tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, 130 tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
132 TIPC_CLUSTER_SCOPE, 0, addr); 131 TIPC_CLUSTER_SCOPE, 0, addr);
133 } 132 }
134 spin_unlock_bh(&tn->node_list_lock);
135} 133}
136 134
137void tipc_net_stop(struct net *net) 135void tipc_net_stop(struct net *net)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c1076c19b858..ab27a2872935 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -451,14 +451,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode)
451 return transport->shutdown(vsock_sk(sk), mode); 451 return transport->shutdown(vsock_sk(sk), mode);
452} 452}
453 453
454void vsock_pending_work(struct work_struct *work) 454static void vsock_pending_work(struct work_struct *work)
455{ 455{
456 struct sock *sk; 456 struct sock *sk;
457 struct sock *listener; 457 struct sock *listener;
458 struct vsock_sock *vsk; 458 struct vsock_sock *vsk;
459 bool cleanup; 459 bool cleanup;
460 460
461 vsk = container_of(work, struct vsock_sock, dwork.work); 461 vsk = container_of(work, struct vsock_sock, pending_work.work);
462 sk = sk_vsock(vsk); 462 sk = sk_vsock(vsk);
463 listener = vsk->listener; 463 listener = vsk->listener;
464 cleanup = true; 464 cleanup = true;
@@ -498,7 +498,6 @@ out:
498 sock_put(sk); 498 sock_put(sk);
499 sock_put(listener); 499 sock_put(listener);
500} 500}
501EXPORT_SYMBOL_GPL(vsock_pending_work);
502 501
503/**** SOCKET OPERATIONS ****/ 502/**** SOCKET OPERATIONS ****/
504 503
@@ -597,6 +596,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
597 return retval; 596 return retval;
598} 597}
599 598
599static void vsock_connect_timeout(struct work_struct *work);
600
600struct sock *__vsock_create(struct net *net, 601struct sock *__vsock_create(struct net *net,
601 struct socket *sock, 602 struct socket *sock,
602 struct sock *parent, 603 struct sock *parent,
@@ -638,6 +639,8 @@ struct sock *__vsock_create(struct net *net,
638 vsk->sent_request = false; 639 vsk->sent_request = false;
639 vsk->ignore_connecting_rst = false; 640 vsk->ignore_connecting_rst = false;
640 vsk->peer_shutdown = 0; 641 vsk->peer_shutdown = 0;
642 INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
643 INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
641 644
642 psk = parent ? vsock_sk(parent) : NULL; 645 psk = parent ? vsock_sk(parent) : NULL;
643 if (parent) { 646 if (parent) {
@@ -1117,7 +1120,7 @@ static void vsock_connect_timeout(struct work_struct *work)
1117 struct vsock_sock *vsk; 1120 struct vsock_sock *vsk;
1118 int cancel = 0; 1121 int cancel = 0;
1119 1122
1120 vsk = container_of(work, struct vsock_sock, dwork.work); 1123 vsk = container_of(work, struct vsock_sock, connect_work.work);
1121 sk = sk_vsock(vsk); 1124 sk = sk_vsock(vsk);
1122 1125
1123 lock_sock(sk); 1126 lock_sock(sk);
@@ -1221,9 +1224,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1221 * timeout fires. 1224 * timeout fires.
1222 */ 1225 */
1223 sock_hold(sk); 1226 sock_hold(sk);
1224 INIT_DELAYED_WORK(&vsk->dwork, 1227 schedule_delayed_work(&vsk->connect_work, timeout);
1225 vsock_connect_timeout);
1226 schedule_delayed_work(&vsk->dwork, timeout);
1227 1228
1228 /* Skip ahead to preserve error code set above. */ 1229 /* Skip ahead to preserve error code set above. */
1229 goto out_wait; 1230 goto out_wait;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index a7a73ffe675b..cb332adb84cd 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1094,8 +1094,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
1094 vpending->listener = sk; 1094 vpending->listener = sk;
1095 sock_hold(sk); 1095 sock_hold(sk);
1096 sock_hold(pending); 1096 sock_hold(pending);
1097 INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); 1097 schedule_delayed_work(&vpending->pending_work, HZ);
1098 schedule_delayed_work(&vpending->dwork, HZ);
1099 1098
1100out: 1099out:
1101 return err; 1100 return err;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 72335c2e8108..4e937cd7c17d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -84,10 +84,8 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
84{ 84{
85 int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len); 85 int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
86 86
87 if (err) { 87 if (err)
88 xdp_return_buff(xdp);
89 xs->rx_dropped++; 88 xs->rx_dropped++;
90 }
91 89
92 return err; 90 return err;
93} 91}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 52ecaf770642..8a64b150be54 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -250,7 +250,7 @@ static inline bool xskq_full_desc(struct xsk_queue *q)
250 250
251static inline bool xskq_empty_desc(struct xsk_queue *q) 251static inline bool xskq_empty_desc(struct xsk_queue *q)
252{ 252{
253 return xskq_nb_free(q, q->prod_tail, 1) == q->nentries; 253 return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries;
254} 254}
255 255
256void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props); 256void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5f48251c1319..7c5e8978aeaa 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2286,6 +2286,9 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
2286 if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) 2286 if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
2287 return make_blackhole(net, dst_orig->ops->family, dst_orig); 2287 return make_blackhole(net, dst_orig->ops->family, dst_orig);
2288 2288
2289 if (IS_ERR(dst))
2290 dst_release(dst_orig);
2291
2289 return dst; 2292 return dst;
2290} 2293}
2291EXPORT_SYMBOL(xfrm_lookup_route); 2294EXPORT_SYMBOL(xfrm_lookup_route);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 080035f056d9..33878e6e0d0a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1025,10 +1025,12 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
1025{ 1025{
1026 struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); 1026 struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
1027 1027
1028 if (nlsk) 1028 if (!nlsk) {
1029 return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); 1029 kfree_skb(skb);
1030 else 1030 return -EPIPE;
1031 return -1; 1031 }
1032
1033 return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
1032} 1034}
1033 1035
1034static inline unsigned int xfrm_spdinfo_msgsize(void) 1036static inline unsigned int xfrm_spdinfo_msgsize(void)
@@ -1671,9 +1673,11 @@ static inline unsigned int userpolicy_type_attrsize(void)
1671#ifdef CONFIG_XFRM_SUB_POLICY 1673#ifdef CONFIG_XFRM_SUB_POLICY
1672static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) 1674static int copy_to_user_policy_type(u8 type, struct sk_buff *skb)
1673{ 1675{
1674 struct xfrm_userpolicy_type upt = { 1676 struct xfrm_userpolicy_type upt;
1675 .type = type, 1677
1676 }; 1678 /* Sadly there are two holes in struct xfrm_userpolicy_type */
1679 memset(&upt, 0, sizeof(upt));
1680 upt.type = type;
1677 1681
1678 return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); 1682 return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
1679} 1683}
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index 303e9e7161f3..4938dcbaecbf 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -14,7 +14,7 @@
14#include <uapi/linux/bpf.h> 14#include <uapi/linux/bpf.h>
15#include "bpf_helpers.h" 15#include "bpf_helpers.h"
16 16
17#define MAX_CPUS 12 /* WARNING - sync with _user.c */ 17#define MAX_CPUS 64 /* WARNING - sync with _user.c */
18 18
19/* Special map type that can XDP_REDIRECT frames to another CPU */ 19/* Special map type that can XDP_REDIRECT frames to another CPU */
20struct bpf_map_def SEC("maps") cpu_map = { 20struct bpf_map_def SEC("maps") cpu_map = {
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index f6efaefd485b..4b4d78fffe30 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -19,7 +19,7 @@ static const char *__doc__ =
19#include <arpa/inet.h> 19#include <arpa/inet.h>
20#include <linux/if_link.h> 20#include <linux/if_link.h>
21 21
22#define MAX_CPUS 12 /* WARNING - sync with _kern.c */ 22#define MAX_CPUS 64 /* WARNING - sync with _kern.c */
23 23
24/* How many xdp_progs are defined in _kern.c */ 24/* How many xdp_progs are defined in _kern.c */
25#define MAX_PROG 5 25#define MAX_PROG 5
@@ -527,7 +527,7 @@ static void stress_cpumap(void)
527 * procedure. 527 * procedure.
528 */ 528 */
529 create_cpu_entry(1, 1024, 0, false); 529 create_cpu_entry(1, 1024, 0, false);
530 create_cpu_entry(1, 128, 0, false); 530 create_cpu_entry(1, 8, 0, false);
531 create_cpu_entry(1, 16000, 0, false); 531 create_cpu_entry(1, 16000, 0, false);
532} 532}
533 533
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index b593b36ccff8..38b2b4818e8e 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -14,10 +14,6 @@ ifdef CONFIG_UBSAN_ALIGNMENT
14 CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment) 14 CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
15endif 15endif
16 16
17ifdef CONFIG_UBSAN_NULL
18 CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
19endif
20
21 # -fsanitize=* options makes GCC less smart than usual and 17 # -fsanitize=* options makes GCC less smart than usual and
22 # increase number of 'maybe-uninitialized false-positives 18 # increase number of 'maybe-uninitialized false-positives
23 CFLAGS_UBSAN += $(call cc-option, -Wno-maybe-uninitialized) 19 CFLAGS_UBSAN += $(call cc-option, -Wno-maybe-uninitialized)
diff --git a/security/Kconfig b/security/Kconfig
index c4302067a3ad..afa91c6f06bb 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -57,7 +57,7 @@ config SECURITY_NETWORK
57config PAGE_TABLE_ISOLATION 57config PAGE_TABLE_ISOLATION
58 bool "Remove the kernel mapping in user mode" 58 bool "Remove the kernel mapping in user mode"
59 default y 59 default y
60 depends on X86_64 && !UML 60 depends on X86 && !UML
61 help 61 help
62 This feature reduces the number of hardware side channels by 62 This feature reduces the number of hardware side channels by
63 ensuring that the majority of kernel addresses are not mapped 63 ensuring that the majority of kernel addresses are not mapped
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 097b1a5e046b..f74a8bcbda87 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -36,6 +36,7 @@
36#include <assert.h> 36#include <assert.h>
37#include <errno.h> 37#include <errno.h>
38#include <fcntl.h> 38#include <fcntl.h>
39#include <linux/kernel.h>
39#include <stdbool.h> 40#include <stdbool.h>
40#include <stdio.h> 41#include <stdio.h>
41#include <stdlib.h> 42#include <stdlib.h>
@@ -90,7 +91,8 @@ static bool map_is_map_of_progs(__u32 type)
90static void *alloc_value(struct bpf_map_info *info) 91static void *alloc_value(struct bpf_map_info *info)
91{ 92{
92 if (map_is_per_cpu(info->type)) 93 if (map_is_per_cpu(info->type))
93 return malloc(info->value_size * get_possible_cpus()); 94 return malloc(round_up(info->value_size, 8) *
95 get_possible_cpus());
94 else 96 else
95 return malloc(info->value_size); 97 return malloc(info->value_size);
96} 98}
@@ -161,9 +163,10 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
161 jsonw_name(json_wtr, "value"); 163 jsonw_name(json_wtr, "value");
162 print_hex_data_json(value, info->value_size); 164 print_hex_data_json(value, info->value_size);
163 } else { 165 } else {
164 unsigned int i, n; 166 unsigned int i, n, step;
165 167
166 n = get_possible_cpus(); 168 n = get_possible_cpus();
169 step = round_up(info->value_size, 8);
167 170
168 jsonw_name(json_wtr, "key"); 171 jsonw_name(json_wtr, "key");
169 print_hex_data_json(key, info->key_size); 172 print_hex_data_json(key, info->key_size);
@@ -176,7 +179,7 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
176 jsonw_int_field(json_wtr, "cpu", i); 179 jsonw_int_field(json_wtr, "cpu", i);
177 180
178 jsonw_name(json_wtr, "value"); 181 jsonw_name(json_wtr, "value");
179 print_hex_data_json(value + i * info->value_size, 182 print_hex_data_json(value + i * step,
180 info->value_size); 183 info->value_size);
181 184
182 jsonw_end_object(json_wtr); 185 jsonw_end_object(json_wtr);
@@ -207,9 +210,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
207 210
208 printf("\n"); 211 printf("\n");
209 } else { 212 } else {
210 unsigned int i, n; 213 unsigned int i, n, step;
211 214
212 n = get_possible_cpus(); 215 n = get_possible_cpus();
216 step = round_up(info->value_size, 8);
213 217
214 printf("key:\n"); 218 printf("key:\n");
215 fprint_hex(stdout, key, info->key_size, " "); 219 fprint_hex(stdout, key, info->key_size, " ");
@@ -217,7 +221,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
217 for (i = 0; i < n; i++) { 221 for (i = 0; i < n; i++) {
218 printf("value (CPU %02d):%c", 222 printf("value (CPU %02d):%c",
219 i, info->value_size > 16 ? '\n' : ' '); 223 i, info->value_size > 16 ? '\n' : ' ');
220 fprint_hex(stdout, value + i * info->value_size, 224 fprint_hex(stdout, value + i * step,
221 info->value_size, " "); 225 info->value_size, " ");
222 printf("\n"); 226 printf("\n");
223 } 227 }
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 0b5ddbe135a4..972265f32871 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -76,7 +76,7 @@ struct btf_type {
76 */ 76 */
77#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) 77#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
78#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) 78#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
79#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) 79#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
80 80
81/* Attributes stored in the BTF_INT_ENCODING */ 81/* Attributes stored in the BTF_INT_ENCODING */
82#define BTF_INT_SIGNED (1 << 0) 82#define BTF_INT_SIGNED (1 << 0)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 8c54a4b6f187..c36a3a76986a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1,8 +1,7 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1// SPDX-License-Identifier: LGPL-2.1
2/* Copyright (c) 2018 Facebook */ 2/* Copyright (c) 2018 Facebook */
3 3
4#include <stdlib.h> 4#include <stdlib.h>
5#include <stdint.h>
6#include <string.h> 5#include <string.h>
7#include <unistd.h> 6#include <unistd.h>
8#include <errno.h> 7#include <errno.h>
@@ -27,13 +26,13 @@ struct btf {
27 struct btf_type **types; 26 struct btf_type **types;
28 const char *strings; 27 const char *strings;
29 void *nohdr_data; 28 void *nohdr_data;
30 uint32_t nr_types; 29 __u32 nr_types;
31 uint32_t types_size; 30 __u32 types_size;
32 uint32_t data_size; 31 __u32 data_size;
33 int fd; 32 int fd;
34}; 33};
35 34
36static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset) 35static const char *btf_name_by_offset(const struct btf *btf, __u32 offset)
37{ 36{
38 if (offset < btf->hdr->str_len) 37 if (offset < btf->hdr->str_len)
39 return &btf->strings[offset]; 38 return &btf->strings[offset];
@@ -45,7 +44,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t)
45{ 44{
46 if (btf->types_size - btf->nr_types < 2) { 45 if (btf->types_size - btf->nr_types < 2) {
47 struct btf_type **new_types; 46 struct btf_type **new_types;
48 u32 expand_by, new_size; 47 __u32 expand_by, new_size;
49 48
50 if (btf->types_size == BTF_MAX_NR_TYPES) 49 if (btf->types_size == BTF_MAX_NR_TYPES)
51 return -E2BIG; 50 return -E2BIG;
@@ -72,7 +71,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t)
72static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) 71static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log)
73{ 72{
74 const struct btf_header *hdr = btf->hdr; 73 const struct btf_header *hdr = btf->hdr;
75 u32 meta_left; 74 __u32 meta_left;
76 75
77 if (btf->data_size < sizeof(struct btf_header)) { 76 if (btf->data_size < sizeof(struct btf_header)) {
78 elog("BTF header not found\n"); 77 elog("BTF header not found\n");
@@ -151,7 +150,7 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
151 150
152 while (next_type < end_type) { 151 while (next_type < end_type) {
153 struct btf_type *t = next_type; 152 struct btf_type *t = next_type;
154 uint16_t vlen = BTF_INFO_VLEN(t->info); 153 __u16 vlen = BTF_INFO_VLEN(t->info);
155 int err; 154 int err;
156 155
157 next_type += sizeof(*t); 156 next_type += sizeof(*t);
@@ -190,8 +189,7 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
190 return 0; 189 return 0;
191} 190}
192 191
193static const struct btf_type *btf_type_by_id(const struct btf *btf, 192const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
194 uint32_t type_id)
195{ 193{
196 if (type_id > btf->nr_types) 194 if (type_id > btf->nr_types)
197 return NULL; 195 return NULL;
@@ -209,7 +207,7 @@ static bool btf_type_is_void_or_null(const struct btf_type *t)
209 return !t || btf_type_is_void(t); 207 return !t || btf_type_is_void(t);
210} 208}
211 209
212static int64_t btf_type_size(const struct btf_type *t) 210static __s64 btf_type_size(const struct btf_type *t)
213{ 211{
214 switch (BTF_INFO_KIND(t->info)) { 212 switch (BTF_INFO_KIND(t->info)) {
215 case BTF_KIND_INT: 213 case BTF_KIND_INT:
@@ -226,15 +224,15 @@ static int64_t btf_type_size(const struct btf_type *t)
226 224
227#define MAX_RESOLVE_DEPTH 32 225#define MAX_RESOLVE_DEPTH 32
228 226
229int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id) 227__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
230{ 228{
231 const struct btf_array *array; 229 const struct btf_array *array;
232 const struct btf_type *t; 230 const struct btf_type *t;
233 uint32_t nelems = 1; 231 __u32 nelems = 1;
234 int64_t size = -1; 232 __s64 size = -1;
235 int i; 233 int i;
236 234
237 t = btf_type_by_id(btf, type_id); 235 t = btf__type_by_id(btf, type_id);
238 for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); 236 for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
239 i++) { 237 i++) {
240 size = btf_type_size(t); 238 size = btf_type_size(t);
@@ -259,7 +257,7 @@ int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id)
259 return -EINVAL; 257 return -EINVAL;
260 } 258 }
261 259
262 t = btf_type_by_id(btf, type_id); 260 t = btf__type_by_id(btf, type_id);
263 } 261 }
264 262
265 if (size < 0) 263 if (size < 0)
@@ -271,9 +269,9 @@ int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id)
271 return nelems * size; 269 return nelems * size;
272} 270}
273 271
274int32_t btf__find_by_name(const struct btf *btf, const char *type_name) 272__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
275{ 273{
276 uint32_t i; 274 __u32 i;
277 275
278 if (!strcmp(type_name, "void")) 276 if (!strcmp(type_name, "void"))
279 return 0; 277 return 0;
@@ -302,10 +300,9 @@ void btf__free(struct btf *btf)
302 free(btf); 300 free(btf);
303} 301}
304 302
305struct btf *btf__new(uint8_t *data, uint32_t size, 303struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log)
306 btf_print_fn_t err_log)
307{ 304{
308 uint32_t log_buf_size = 0; 305 __u32 log_buf_size = 0;
309 char *log_buf = NULL; 306 char *log_buf = NULL;
310 struct btf *btf; 307 struct btf *btf;
311 int err; 308 int err;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 74bb344035bb..caac3a404dc5 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -1,22 +1,24 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: LGPL-2.1 */
2/* Copyright (c) 2018 Facebook */ 2/* Copyright (c) 2018 Facebook */
3 3
4#ifndef __BPF_BTF_H 4#ifndef __BPF_BTF_H
5#define __BPF_BTF_H 5#define __BPF_BTF_H
6 6
7#include <stdint.h> 7#include <linux/types.h>
8 8
9#define BTF_ELF_SEC ".BTF" 9#define BTF_ELF_SEC ".BTF"
10 10
11struct btf; 11struct btf;
12struct btf_type;
12 13
13typedef int (*btf_print_fn_t)(const char *, ...) 14typedef int (*btf_print_fn_t)(const char *, ...)
14 __attribute__((format(printf, 1, 2))); 15 __attribute__((format(printf, 1, 2)));
15 16
16void btf__free(struct btf *btf); 17void btf__free(struct btf *btf);
17struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log); 18struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
18int32_t btf__find_by_name(const struct btf *btf, const char *type_name); 19__s32 btf__find_by_name(const struct btf *btf, const char *type_name);
19int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id); 20const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id);
21__s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
20int btf__fd(const struct btf *btf); 22int btf__fd(const struct btf *btf);
21 23
22#endif 24#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a1e96b5de5ff..1aafdbe827fe 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -36,6 +36,7 @@
36#include <linux/err.h> 36#include <linux/err.h>
37#include <linux/kernel.h> 37#include <linux/kernel.h>
38#include <linux/bpf.h> 38#include <linux/bpf.h>
39#include <linux/btf.h>
39#include <linux/list.h> 40#include <linux/list.h>
40#include <linux/limits.h> 41#include <linux/limits.h>
41#include <sys/stat.h> 42#include <sys/stat.h>
@@ -216,8 +217,8 @@ struct bpf_map {
216 size_t offset; 217 size_t offset;
217 int map_ifindex; 218 int map_ifindex;
218 struct bpf_map_def def; 219 struct bpf_map_def def;
219 uint32_t btf_key_type_id; 220 __u32 btf_key_type_id;
220 uint32_t btf_value_type_id; 221 __u32 btf_value_type_id;
221 void *priv; 222 void *priv;
222 bpf_map_clear_priv_t clear_priv; 223 bpf_map_clear_priv_t clear_priv;
223}; 224};
@@ -1014,68 +1015,72 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1014 1015
1015static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) 1016static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
1016{ 1017{
1018 const struct btf_type *container_type;
1019 const struct btf_member *key, *value;
1017 struct bpf_map_def *def = &map->def; 1020 struct bpf_map_def *def = &map->def;
1018 const size_t max_name = 256; 1021 const size_t max_name = 256;
1019 int64_t key_size, value_size; 1022 char container_name[max_name];
1020 int32_t key_id, value_id; 1023 __s64 key_size, value_size;
1021 char name[max_name]; 1024 __s32 container_id;
1022 1025
1023 /* Find key type by name from BTF */ 1026 if (snprintf(container_name, max_name, "____btf_map_%s", map->name) ==
1024 if (snprintf(name, max_name, "%s_key", map->name) == max_name) { 1027 max_name) {
1025 pr_warning("map:%s length of BTF key_type:%s_key is too long\n", 1028 pr_warning("map:%s length of '____btf_map_%s' is too long\n",
1026 map->name, map->name); 1029 map->name, map->name);
1027 return -EINVAL; 1030 return -EINVAL;
1028 } 1031 }
1029 1032
1030 key_id = btf__find_by_name(btf, name); 1033 container_id = btf__find_by_name(btf, container_name);
1031 if (key_id < 0) { 1034 if (container_id < 0) {
1032 pr_debug("map:%s key_type:%s cannot be found in BTF\n", 1035 pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
1033 map->name, name); 1036 map->name, container_name);
1034 return key_id; 1037 return container_id;
1035 } 1038 }
1036 1039
1037 key_size = btf__resolve_size(btf, key_id); 1040 container_type = btf__type_by_id(btf, container_id);
1038 if (key_size < 0) { 1041 if (!container_type) {
1039 pr_warning("map:%s key_type:%s cannot get the BTF type_size\n", 1042 pr_warning("map:%s cannot find BTF type for container_id:%u\n",
1040 map->name, name); 1043 map->name, container_id);
1041 return key_size; 1044 return -EINVAL;
1042 } 1045 }
1043 1046
1044 if (def->key_size != key_size) { 1047 if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
1045 pr_warning("map:%s key_type:%s has BTF type_size:%u != key_size:%u\n", 1048 BTF_INFO_VLEN(container_type->info) < 2) {
1046 map->name, name, (unsigned int)key_size, def->key_size); 1049 pr_warning("map:%s container_name:%s is an invalid container struct\n",
1050 map->name, container_name);
1047 return -EINVAL; 1051 return -EINVAL;
1048 } 1052 }
1049 1053
1050 /* Find value type from BTF */ 1054 key = (struct btf_member *)(container_type + 1);
1051 if (snprintf(name, max_name, "%s_value", map->name) == max_name) { 1055 value = key + 1;
1052 pr_warning("map:%s length of BTF value_type:%s_value is too long\n", 1056
1053 map->name, map->name); 1057 key_size = btf__resolve_size(btf, key->type);
1054 return -EINVAL; 1058 if (key_size < 0) {
1059 pr_warning("map:%s invalid BTF key_type_size\n",
1060 map->name);
1061 return key_size;
1055 } 1062 }
1056 1063
1057 value_id = btf__find_by_name(btf, name); 1064 if (def->key_size != key_size) {
1058 if (value_id < 0) { 1065 pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
1059 pr_debug("map:%s value_type:%s cannot be found in BTF\n", 1066 map->name, (__u32)key_size, def->key_size);
1060 map->name, name); 1067 return -EINVAL;
1061 return value_id;
1062 } 1068 }
1063 1069
1064 value_size = btf__resolve_size(btf, value_id); 1070 value_size = btf__resolve_size(btf, value->type);
1065 if (value_size < 0) { 1071 if (value_size < 0) {
1066 pr_warning("map:%s value_type:%s cannot get the BTF type_size\n", 1072 pr_warning("map:%s invalid BTF value_type_size\n", map->name);
1067 map->name, name);
1068 return value_size; 1073 return value_size;
1069 } 1074 }
1070 1075
1071 if (def->value_size != value_size) { 1076 if (def->value_size != value_size) {
1072 pr_warning("map:%s value_type:%s has BTF type_size:%u != value_size:%u\n", 1077 pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
1073 map->name, name, (unsigned int)value_size, def->value_size); 1078 map->name, (__u32)value_size, def->value_size);
1074 return -EINVAL; 1079 return -EINVAL;
1075 } 1080 }
1076 1081
1077 map->btf_key_type_id = key_id; 1082 map->btf_key_type_id = key->type;
1078 map->btf_value_type_id = value_id; 1083 map->btf_value_type_id = value->type;
1079 1084
1080 return 0; 1085 return 0;
1081} 1086}
@@ -2089,12 +2094,12 @@ const char *bpf_map__name(struct bpf_map *map)
2089 return map ? map->name : NULL; 2094 return map ? map->name : NULL;
2090} 2095}
2091 2096
2092uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map) 2097__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
2093{ 2098{
2094 return map ? map->btf_key_type_id : 0; 2099 return map ? map->btf_key_type_id : 0;
2095} 2100}
2096 2101
2097uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map) 2102__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
2098{ 2103{
2099 return map ? map->btf_value_type_id : 0; 2104 return map ? map->btf_value_type_id : 0;
2100} 2105}
@@ -2268,8 +2273,8 @@ bpf_perf_event_read_simple(void *mem, unsigned long size,
2268 volatile struct perf_event_mmap_page *header = mem; 2273 volatile struct perf_event_mmap_page *header = mem;
2269 __u64 data_tail = header->data_tail; 2274 __u64 data_tail = header->data_tail;
2270 __u64 data_head = header->data_head; 2275 __u64 data_head = header->data_head;
2276 int ret = LIBBPF_PERF_EVENT_ERROR;
2271 void *base, *begin, *end; 2277 void *base, *begin, *end;
2272 int ret;
2273 2278
2274 asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ 2279 asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
2275 if (data_head == data_tail) 2280 if (data_head == data_tail)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 09976531aa74..b33ae02f7d0e 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -244,8 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
244int bpf_map__fd(struct bpf_map *map); 244int bpf_map__fd(struct bpf_map *map);
245const struct bpf_map_def *bpf_map__def(struct bpf_map *map); 245const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
246const char *bpf_map__name(struct bpf_map *map); 246const char *bpf_map__name(struct bpf_map *map);
247uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map); 247__u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
248uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map); 248__u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
249 249
250typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); 250typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
251int bpf_map__set_priv(struct bpf_map *map, void *priv, 251int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 1b09f3175a1f..0cbd1ef8f86d 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -804,7 +804,7 @@ type of fence:
804Second, some types of fence affect the way the memory subsystem 804Second, some types of fence affect the way the memory subsystem
805propagates stores. When a fence instruction is executed on CPU C: 805propagates stores. When a fence instruction is executed on CPU C:
806 806
807 For each other CPU C', smb_wmb() forces all po-earlier stores 807 For each other CPU C', smp_wmb() forces all po-earlier stores
808 on C to propagate to C' before any po-later stores do. 808 on C to propagate to C' before any po-later stores do.
809 809
810 For each other CPU C', any store which propagates to C before 810 For each other CPU C', any store which propagates to C before
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index ee4309a87fc4..af72700cc20a 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -126,7 +126,7 @@ However, it is not necessarily the case that accesses ordered by
126locking will be seen as ordered by CPUs not holding that lock. 126locking will be seen as ordered by CPUs not holding that lock.
127Consider this example: 127Consider this example:
128 128
129 /* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */ 129 /* See Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus. */
130 void CPU0(void) 130 void CPU0(void)
131 { 131 {
132 spin_lock(&mylock); 132 spin_lock(&mylock);
@@ -292,7 +292,7 @@ and to use smp_load_acquire() instead of smp_rmb(). However, the older
292smp_wmb() and smp_rmb() APIs are still heavily used, so it is important 292smp_wmb() and smp_rmb() APIs are still heavily used, so it is important
293to understand their use cases. The general approach is shown below: 293to understand their use cases. The general approach is shown below:
294 294
295 /* See MP+wmbonceonce+rmbonceonce.litmus. */ 295 /* See MP+fencewmbonceonce+fencermbonceonce.litmus. */
296 void CPU0(void) 296 void CPU0(void)
297 { 297 {
298 WRITE_ONCE(x, 1); 298 WRITE_ONCE(x, 1);
@@ -322,9 +322,9 @@ the following write-side code fragment:
322And the xlog_valid_lsn() function in fs/xfs/xfs_log_priv.h contains 322And the xlog_valid_lsn() function in fs/xfs/xfs_log_priv.h contains
323the corresponding read-side code fragment: 323the corresponding read-side code fragment:
324 324
325 cur_cycle = ACCESS_ONCE(log->l_curr_cycle); 325 cur_cycle = READ_ONCE(log->l_curr_cycle);
326 smp_rmb(); 326 smp_rmb();
327 cur_block = ACCESS_ONCE(log->l_curr_block); 327 cur_block = READ_ONCE(log->l_curr_block);
328 328
329Alternatively, consider the following comment in function 329Alternatively, consider the following comment in function
330perf_output_put_handle() in kernel/events/ring_buffer.c: 330perf_output_put_handle() in kernel/events/ring_buffer.c:
@@ -360,7 +360,7 @@ can be seen in the LB+poonceonces.litmus litmus test.
360One way of avoiding the counter-intuitive outcome is through the use of a 360One way of avoiding the counter-intuitive outcome is through the use of a
361control dependency paired with a full memory barrier: 361control dependency paired with a full memory barrier:
362 362
363 /* See LB+ctrlonceonce+mbonceonce.litmus. */ 363 /* See LB+fencembonceonce+ctrlonceonce.litmus. */
364 void CPU0(void) 364 void CPU0(void)
365 { 365 {
366 r0 = READ_ONCE(x); 366 r0 = READ_ONCE(x);
@@ -476,7 +476,7 @@ that one CPU first stores to one variable and then loads from a second,
476while another CPU stores to the second variable and then loads from the 476while another CPU stores to the second variable and then loads from the
477first. Preserving order requires nothing less than full barriers: 477first. Preserving order requires nothing less than full barriers:
478 478
479 /* See SB+mbonceonces.litmus. */ 479 /* See SB+fencembonceonces.litmus. */
480 void CPU0(void) 480 void CPU0(void)
481 { 481 {
482 WRITE_ONCE(x, 1); 482 WRITE_ONCE(x, 1);
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 734f7feaa5dc..ee987ce20aae 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -35,13 +35,13 @@ BASIC USAGE: HERD7
35The memory model is used, in conjunction with "herd7", to exhaustively 35The memory model is used, in conjunction with "herd7", to exhaustively
36explore the state space of small litmus tests. 36explore the state space of small litmus tests.
37 37
38For example, to run SB+mbonceonces.litmus against the memory model: 38For example, to run SB+fencembonceonces.litmus against the memory model:
39 39
40 $ herd7 -conf linux-kernel.cfg litmus-tests/SB+mbonceonces.litmus 40 $ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
41 41
42Here is the corresponding output: 42Here is the corresponding output:
43 43
44 Test SB+mbonceonces Allowed 44 Test SB+fencembonceonces Allowed
45 States 3 45 States 3
46 0:r0=0; 1:r0=1; 46 0:r0=0; 1:r0=1;
47 0:r0=1; 1:r0=0; 47 0:r0=1; 1:r0=0;
@@ -50,8 +50,8 @@ Here is the corresponding output:
50 Witnesses 50 Witnesses
51 Positive: 0 Negative: 3 51 Positive: 0 Negative: 3
52 Condition exists (0:r0=0 /\ 1:r0=0) 52 Condition exists (0:r0=0 /\ 1:r0=0)
53 Observation SB+mbonceonces Never 0 3 53 Observation SB+fencembonceonces Never 0 3
54 Time SB+mbonceonces 0.01 54 Time SB+fencembonceonces 0.01
55 Hash=d66d99523e2cac6b06e66f4c995ebb48 55 Hash=d66d99523e2cac6b06e66f4c995ebb48
56 56
57The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that 57The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
@@ -67,16 +67,16 @@ BASIC USAGE: KLITMUS7
67The "klitmus7" tool converts a litmus test into a Linux kernel module, 67The "klitmus7" tool converts a litmus test into a Linux kernel module,
68which may then be loaded and run. 68which may then be loaded and run.
69 69
70For example, to run SB+mbonceonces.litmus against hardware: 70For example, to run SB+fencembonceonces.litmus against hardware:
71 71
72 $ mkdir mymodules 72 $ mkdir mymodules
73 $ klitmus7 -o mymodules litmus-tests/SB+mbonceonces.litmus 73 $ klitmus7 -o mymodules litmus-tests/SB+fencembonceonces.litmus
74 $ cd mymodules ; make 74 $ cd mymodules ; make
75 $ sudo sh run.sh 75 $ sudo sh run.sh
76 76
77The corresponding output includes: 77The corresponding output includes:
78 78
79 Test SB+mbonceonces Allowed 79 Test SB+fencembonceonces Allowed
80 Histogram (3 states) 80 Histogram (3 states)
81 644580 :>0:r0=1; 1:r0=0; 81 644580 :>0:r0=1; 1:r0=0;
82 644328 :>0:r0=0; 1:r0=1; 82 644328 :>0:r0=0; 1:r0=1;
@@ -86,8 +86,8 @@ The corresponding output includes:
86 Positive: 0, Negative: 2000000 86 Positive: 0, Negative: 2000000
87 Condition exists (0:r0=0 /\ 1:r0=0) is NOT validated 87 Condition exists (0:r0=0 /\ 1:r0=0) is NOT validated
88 Hash=d66d99523e2cac6b06e66f4c995ebb48 88 Hash=d66d99523e2cac6b06e66f4c995ebb48
89 Observation SB+mbonceonces Never 0 2000000 89 Observation SB+fencembonceonces Never 0 2000000
90 Time SB+mbonceonces 0.16 90 Time SB+fencembonceonces 0.16
91 91
92The "Positive: 0 Negative: 2000000" and the "Never 0 2000000" indicate 92The "Positive: 0 Negative: 2000000" and the "Never 0 2000000" indicate
93that during two million trials, the state specified in this litmus 93that during two million trials, the state specified in this litmus
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 64f5740e0e75..b84fb2f67109 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -13,7 +13,7 @@
13 13
14"Linux-kernel memory consistency model" 14"Linux-kernel memory consistency model"
15 15
16enum Accesses = 'once (*READ_ONCE,WRITE_ONCE,ACCESS_ONCE*) || 16enum Accesses = 'once (*READ_ONCE,WRITE_ONCE*) ||
17 'release (*smp_store_release*) || 17 'release (*smp_store_release*) ||
18 'acquire (*smp_load_acquire*) || 18 'acquire (*smp_load_acquire*) ||
19 'noreturn (* R of non-return RMW *) 19 'noreturn (* R of non-return RMW *)
diff --git a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
index 98a3716efa37..e729d2776e89 100644
--- a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
@@ -1,4 +1,4 @@
1C IRIW+mbonceonces+OnceOnce 1C IRIW+fencembonceonces+OnceOnce
2 2
3(* 3(*
4 * Result: Never 4 * Result: Never
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
index 7a39a0aaa976..0f749e419b34 100644
--- a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
@@ -1,4 +1,4 @@
1C ISA2+pooncelock+pooncelock+pombonce.litmus 1C ISA2+pooncelock+pooncelock+pombonce
2 2
3(* 3(*
4 * Result: Sometimes 4 * Result: Sometimes
diff --git a/tools/memory-model/litmus-tests/LB+ctrlonceonce+mbonceonce.litmus b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
index de6708229dd1..4727f5aaf03b 100644
--- a/tools/memory-model/litmus-tests/LB+ctrlonceonce+mbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
@@ -1,4 +1,4 @@
1C LB+ctrlonceonce+mbonceonce 1C LB+fencembonceonce+ctrlonceonce
2 2
3(* 3(*
4 * Result: Never 4 * Result: Never
diff --git a/tools/memory-model/litmus-tests/MP+wmbonceonce+rmbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
index c078f38ff27a..a273da9faa6d 100644
--- a/tools/memory-model/litmus-tests/MP+wmbonceonce+rmbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -1,4 +1,4 @@
1C MP+wmbonceonce+rmbonceonce 1C MP+fencewmbonceonce+fencermbonceonce
2 2
3(* 3(*
4 * Result: Never 4 * Result: Never
diff --git a/tools/memory-model/litmus-tests/R+mbonceonces.litmus b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
index a0e884ad2132..222a0b850b4a 100644
--- a/tools/memory-model/litmus-tests/R+mbonceonces.litmus
+++ b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
@@ -1,4 +1,4 @@
1C R+mbonceonces 1C R+fencembonceonces
2 2
3(* 3(*
4 * Result: Never 4 * Result: Never
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 17eb9a8c222d..4581ec2d3c57 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -18,7 +18,7 @@ CoWW+poonceonce.litmus
18 Test of write-write coherence, that is, whether or not two 18 Test of write-write coherence, that is, whether or not two
19 successive writes to the same variable are ordered. 19 successive writes to the same variable are ordered.
20 20
21IRIW+mbonceonces+OnceOnce.litmus 21IRIW+fencembonceonces+OnceOnce.litmus
22 Test of independent reads from independent writes with smp_mb() 22 Test of independent reads from independent writes with smp_mb()
23 between each pairs of reads. In other words, is smp_mb() 23 between each pairs of reads. In other words, is smp_mb()
24 sufficient to cause two different reading processes to agree on 24 sufficient to cause two different reading processes to agree on
@@ -47,7 +47,7 @@ ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
47 Can a release-acquire chain order a prior store against 47 Can a release-acquire chain order a prior store against
48 a later load? 48 a later load?
49 49
50LB+ctrlonceonce+mbonceonce.litmus 50LB+fencembonceonce+ctrlonceonce.litmus
51 Does a control dependency and an smp_mb() suffice for the 51 Does a control dependency and an smp_mb() suffice for the
52 load-buffering litmus test, where each process reads from one 52 load-buffering litmus test, where each process reads from one
53 of two variables then writes to the other? 53 of two variables then writes to the other?
@@ -88,14 +88,14 @@ MP+porevlocks.litmus
88 As below, but with the first access of the writer process 88 As below, but with the first access of the writer process
89 and the second access of reader process protected by a lock. 89 and the second access of reader process protected by a lock.
90 90
91MP+wmbonceonce+rmbonceonce.litmus 91MP+fencewmbonceonce+fencermbonceonce.litmus
92 Does a smp_wmb() (between the stores) and an smp_rmb() (between 92 Does a smp_wmb() (between the stores) and an smp_rmb() (between
93 the loads) suffice for the message-passing litmus test, where one 93 the loads) suffice for the message-passing litmus test, where one
94 process writes data and then a flag, and the other process reads 94 process writes data and then a flag, and the other process reads
95 the flag and then the data. (This is similar to the ISA2 tests, 95 the flag and then the data. (This is similar to the ISA2 tests,
96 but with two processes instead of three.) 96 but with two processes instead of three.)
97 97
98R+mbonceonces.litmus 98R+fencembonceonces.litmus
99 This is the fully ordered (via smp_mb()) version of one of 99 This is the fully ordered (via smp_mb()) version of one of
100 the classic counterintuitive litmus tests that illustrates the 100 the classic counterintuitive litmus tests that illustrates the
101 effects of store propagation delays. 101 effects of store propagation delays.
@@ -103,7 +103,7 @@ R+mbonceonces.litmus
103R+poonceonces.litmus 103R+poonceonces.litmus
104 As above, but without the smp_mb() invocations. 104 As above, but without the smp_mb() invocations.
105 105
106SB+mbonceonces.litmus 106SB+fencembonceonces.litmus
107 This is the fully ordered (again, via smp_mb() version of store 107 This is the fully ordered (again, via smp_mb() version of store
108 buffering, which forms the core of Dekker's mutual-exclusion 108 buffering, which forms the core of Dekker's mutual-exclusion
109 algorithm. 109 algorithm.
@@ -111,15 +111,24 @@ SB+mbonceonces.litmus
111SB+poonceonces.litmus 111SB+poonceonces.litmus
112 As above, but without the smp_mb() invocations. 112 As above, but without the smp_mb() invocations.
113 113
114SB+rfionceonce-poonceonces.litmus
115 This litmus test demonstrates that LKMM is not fully multicopy
116 atomic. (Neither is it other multicopy atomic.) This litmus test
117 also demonstrates the "locations" debugging aid, which designates
118 additional registers and locations to be printed out in the dump
119 of final states in the herd7 output. Without the "locations"
120 statement, only those registers and locations mentioned in the
121 "exists" clause will be printed.
122
114S+poonceonces.litmus 123S+poonceonces.litmus
115 As below, but without the smp_wmb() and acquire load. 124 As below, but without the smp_wmb() and acquire load.
116 125
117S+wmbonceonce+poacquireonce.litmus 126S+fencewmbonceonce+poacquireonce.litmus
118 Can a smp_wmb(), instead of a release, and an acquire order 127 Can a smp_wmb(), instead of a release, and an acquire order
119 a prior store against a subsequent store? 128 a prior store against a subsequent store?
120 129
121WRC+poonceonces+Once.litmus 130WRC+poonceonces+Once.litmus
122WRC+pooncerelease+rmbonceonce+Once.litmus 131WRC+pooncerelease+fencermbonceonce+Once.litmus
123 These two are members of an extension of the MP litmus-test 132 These two are members of an extension of the MP litmus-test
124 class in which the first write is moved to a separate process. 133 class in which the first write is moved to a separate process.
125 The second is forbidden because smp_store_release() is 134 The second is forbidden because smp_store_release() is
@@ -134,7 +143,7 @@ Z6.0+pooncelock+poonceLock+pombonce.litmus
134 As above, but with smp_mb__after_spinlock() immediately 143 As above, but with smp_mb__after_spinlock() immediately
135 following the spin_lock(). 144 following the spin_lock().
136 145
137Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus 146Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
138 Is the ordering provided by a release-acquire chain sufficient 147 Is the ordering provided by a release-acquire chain sufficient
139 to make ordering apparent to accesses by a process that does 148 to make ordering apparent to accesses by a process that does
140 not participate in that release-acquire chain? 149 not participate in that release-acquire chain?
diff --git a/tools/memory-model/litmus-tests/S+wmbonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
index c53350205d28..18479823cd6c 100644
--- a/tools/memory-model/litmus-tests/S+wmbonceonce+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
@@ -1,4 +1,4 @@
1C S+wmbonceonce+poacquireonce 1C S+fencewmbonceonce+poacquireonce
2 2
3(* 3(*
4 * Result: Never 4 * Result: Never
diff --git a/tools/memory-model/litmus-tests/SB+mbonceonces.litmus b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
index 74b874ffa8da..ed5fff18d223 100644
--- a/tools/memory-model/litmus-tests/SB+mbonceonces.litmus
+++ b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
@@ -1,4 +1,4 @@
1C SB+mbonceonces 1C SB+fencembonceonces
2 2
3(* 3(*
4 * Result: Never 4 * Result: Never
diff --git a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
new file mode 100644
index 000000000000..04a16603660b
--- /dev/null
+++ b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
@@ -0,0 +1,32 @@
1C SB+rfionceonce-poonceonces
2
3(*
4 * Result: Sometimes
5 *
6 * This litmus test demonstrates that LKMM is not fully multicopy atomic.
7 *)
8
9{}
10
11P0(int *x, int *y)
12{
13 int r1;
14 int r2;
15
16 WRITE_ONCE(*x, 1);
17 r1 = READ_ONCE(*x);
18 r2 = READ_ONCE(*y);
19}
20
21P1(int *x, int *y)
22{
23 int r3;
24 int r4;
25
26 WRITE_ONCE(*y, 1);
27 r3 = READ_ONCE(*y);
28 r4 = READ_ONCE(*x);
29}
30
31locations [0:r1; 1:r3; x; y] (* Debug aid: Print things not in "exists". *)
32exists (0:r2=0 /\ 1:r4=0)
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
index ad3448b941e6..e9947250d7de 100644
--- a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
@@ -1,4 +1,4 @@
1C WRC+pooncerelease+rmbonceonce+Once 1C WRC+pooncerelease+fencermbonceonce+Once
2 2
3(* 3(*
4 * Result: Never 4 * Result: Never
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
index a20fc3fafb53..88e70b87a683 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
@@ -1,4 +1,4 @@
1C Z6.0+pooncerelease+poacquirerelease+mbonceonce 1C Z6.0+pooncerelease+poacquirerelease+fencembonceonce
2 2
3(* 3(*
4 * Result: Sometimes 4 * Result: Sometimes
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
index af0aa15ab84e..ca528f9a24d4 100644..100755
--- a/tools/memory-model/scripts/checkalllitmus.sh
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -9,7 +9,7 @@
9# appended. 9# appended.
10# 10#
11# Usage: 11# Usage:
12# sh checkalllitmus.sh [ directory ] 12# checkalllitmus.sh [ directory ]
13# 13#
14# The LINUX_HERD_OPTIONS environment variable may be used to specify 14# The LINUX_HERD_OPTIONS environment variable may be used to specify
15# arguments to herd, whose default is defined by the checklitmus.sh script. 15# arguments to herd, whose default is defined by the checklitmus.sh script.
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
index e2e477472844..bf12a75c0719 100644..100755
--- a/tools/memory-model/scripts/checklitmus.sh
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -8,7 +8,7 @@
8# with ".out" appended. 8# with ".out" appended.
9# 9#
10# Usage: 10# Usage:
11# sh checklitmus.sh file.litmus 11# checklitmus.sh file.litmus
12# 12#
13# The LINUX_HERD_OPTIONS environment variable may be used to specify 13# The LINUX_HERD_OPTIONS environment variable may be used to specify
14# arguments to herd, which default to "-conf linux-kernel.cfg". Thus, 14# arguments to herd, which default to "-conf linux-kernel.cfg". Thus,
diff --git a/tools/objtool/arch/x86/include/asm/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h
index 9c9dc579bd7d..46f516dd80ce 100644
--- a/tools/objtool/arch/x86/include/asm/orc_types.h
+++ b/tools/objtool/arch/x86/include/asm/orc_types.h
@@ -88,6 +88,7 @@ struct orc_entry {
88 unsigned sp_reg:4; 88 unsigned sp_reg:4;
89 unsigned bp_reg:4; 89 unsigned bp_reg:4;
90 unsigned type:2; 90 unsigned type:2;
91 unsigned end:1;
91} __packed; 92} __packed;
92 93
93/* 94/*
@@ -101,6 +102,7 @@ struct unwind_hint {
101 s16 sp_offset; 102 s16 sp_offset;
102 u8 sp_reg; 103 u8 sp_reg;
103 u8 type; 104 u8 type;
105 u8 end;
104}; 106};
105#endif /* __ASSEMBLY__ */ 107#endif /* __ASSEMBLY__ */
106 108
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f4a25bd1871f..2928939b98ec 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1157,6 +1157,7 @@ static int read_unwind_hints(struct objtool_file *file)
1157 1157
1158 cfa->offset = hint->sp_offset; 1158 cfa->offset = hint->sp_offset;
1159 insn->state.type = hint->type; 1159 insn->state.type = hint->type;
1160 insn->state.end = hint->end;
1160 } 1161 }
1161 1162
1162 return 0; 1163 return 0;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index c6b68fcb926f..95700a2bcb7c 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -31,7 +31,7 @@ struct insn_state {
31 int stack_size; 31 int stack_size;
32 unsigned char type; 32 unsigned char type;
33 bool bp_scratch; 33 bool bp_scratch;
34 bool drap; 34 bool drap, end;
35 int drap_reg, drap_offset; 35 int drap_reg, drap_offset;
36 struct cfi_reg vals[CFI_NUM_REGS]; 36 struct cfi_reg vals[CFI_NUM_REGS];
37}; 37};
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index c3343820916a..faa444270ee3 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -203,7 +203,8 @@ int orc_dump(const char *_objname)
203 203
204 print_reg(orc[i].bp_reg, orc[i].bp_offset); 204 print_reg(orc[i].bp_reg, orc[i].bp_offset);
205 205
206 printf(" type:%s\n", orc_type_name(orc[i].type)); 206 printf(" type:%s end:%d\n",
207 orc_type_name(orc[i].type), orc[i].end);
207 } 208 }
208 209
209 elf_end(elf); 210 elf_end(elf);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 18384d9be4e1..3f98dcfbc177 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -31,6 +31,8 @@ int create_orc(struct objtool_file *file)
31 struct cfi_reg *cfa = &insn->state.cfa; 31 struct cfi_reg *cfa = &insn->state.cfa;
32 struct cfi_reg *bp = &insn->state.regs[CFI_BP]; 32 struct cfi_reg *bp = &insn->state.regs[CFI_BP];
33 33
34 orc->end = insn->state.end;
35
34 if (cfa->base == CFI_UNDEFINED) { 36 if (cfa->base == CFI_UNDEFINED) {
35 orc->sp_reg = ORC_REG_UNDEFINED; 37 orc->sp_reg = ORC_REG_UNDEFINED;
36 continue; 38 continue;
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index d39e4ff7d0bf..a6db83a88e85 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -106,7 +106,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics
106\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. 106\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
107\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. 107\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
108\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. 108\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
109\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. 109\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
110\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms. 110\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
111\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz. 111\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
112\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. 112\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
@@ -114,7 +114,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics
114\fBCorWatt\fP Watts consumed by the core part of the package. 114\fBCorWatt\fP Watts consumed by the core part of the package.
115\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors. 115\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
116\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. 116\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
117\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. 117\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary.
118\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. 118\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
119.fi 119.fi
120.SH TOO MUCH INFORMATION EXAMPLE 120.SH TOO MUCH INFORMATION EXAMPLE
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4d14bbbf9b63..980bd9d20646 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1163,9 +1163,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
1163 if (!printed || !summary_only) 1163 if (!printed || !summary_only)
1164 print_header("\t"); 1164 print_header("\t");
1165 1165
1166 if (topo.num_cpus > 1) 1166 format_counters(&average.threads, &average.cores, &average.packages);
1167 format_counters(&average.threads, &average.cores,
1168 &average.packages);
1169 1167
1170 printed = 1; 1168 printed = 1;
1171 1169
@@ -1692,7 +1690,7 @@ void get_apic_id(struct thread_data *t)
1692 t->x2apic_id = edx; 1690 t->x2apic_id = edx;
1693 1691
1694 if (debug && (t->apic_id != t->x2apic_id)) 1692 if (debug && (t->apic_id != t->x2apic_id))
1695 fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 1693 fprintf(outf, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
1696} 1694}
1697 1695
1698/* 1696/*
@@ -2473,55 +2471,43 @@ int get_core_id(int cpu)
2473 2471
2474void set_node_data(void) 2472void set_node_data(void)
2475{ 2473{
2476 char path[80]; 2474 int pkg, node, lnode, cpu, cpux;
2477 FILE *filep; 2475 int cpu_count;
2478 int pkg, node, cpu; 2476
2479 2477 /* initialize logical_node_id */
2480 struct pkg_node_info { 2478 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2481 int count; 2479 cpus[cpu].logical_node_id = -1;
2482 int min; 2480
2483 } *pni; 2481 cpu_count = 0;
2484 2482 for (pkg = 0; pkg < topo.num_packages; pkg++) {
2485 pni = calloc(topo.num_packages, sizeof(struct pkg_node_info)); 2483 lnode = 0;
2486 if (!pni) 2484 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2487 err(1, "calloc pkg_node_count"); 2485 if (cpus[cpu].physical_package_id != pkg)
2488 2486 continue;
2489 for (pkg = 0; pkg < topo.num_packages; pkg++) 2487 /* find a cpu with an unset logical_node_id */
2490 pni[pkg].min = topo.num_cpus; 2488 if (cpus[cpu].logical_node_id != -1)
2491 2489 continue;
2492 for (node = 0; node <= topo.max_node_num; node++) { 2490 cpus[cpu].logical_node_id = lnode;
2493 /* find the "first" cpu in the node */ 2491 node = cpus[cpu].physical_node_id;
2494 sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node); 2492 cpu_count++;
2495 filep = fopen(path, "r"); 2493 /*
2496 if (!filep) 2494 * find all matching cpus on this pkg and set
2497 continue; 2495 * the logical_node_id
2498 fscanf(filep, "%d", &cpu); 2496 */
2499 fclose(filep); 2497 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2500 2498 if ((cpus[cpux].physical_package_id == pkg) &&
2501 pkg = cpus[cpu].physical_package_id; 2499 (cpus[cpux].physical_node_id == node)) {
2502 pni[pkg].count++; 2500 cpus[cpux].logical_node_id = lnode;
2503 2501 cpu_count++;
2504 if (node < pni[pkg].min) 2502 }
2505 pni[pkg].min = node; 2503 }
2506 } 2504 lnode++;
2507 2505 if (lnode > topo.nodes_per_pkg)
2508 for (pkg = 0; pkg < topo.num_packages; pkg++) 2506 topo.nodes_per_pkg = lnode;
2509 if (pni[pkg].count > topo.nodes_per_pkg) 2507 }
2510 topo.nodes_per_pkg = pni[0].count; 2508 if (cpu_count >= topo.max_cpu_num)
2511 2509 break;
2512 /* Fake 1 node per pkg for machines that don't
2513 * expose nodes and thus avoid -nan results
2514 */
2515 if (topo.nodes_per_pkg == 0)
2516 topo.nodes_per_pkg = 1;
2517
2518 for (cpu = 0; cpu < topo.num_cpus; cpu++) {
2519 pkg = cpus[cpu].physical_package_id;
2520 node = cpus[cpu].physical_node_id;
2521 cpus[cpu].logical_node_id = node - pni[pkg].min;
2522 } 2510 }
2523 free(pni);
2524
2525} 2511}
2526 2512
2527int get_physical_node_id(struct cpu_topology *thiscpu) 2513int get_physical_node_id(struct cpu_topology *thiscpu)
@@ -4471,7 +4457,9 @@ void process_cpuid()
4471 family = (fms >> 8) & 0xf; 4457 family = (fms >> 8) & 0xf;
4472 model = (fms >> 4) & 0xf; 4458 model = (fms >> 4) & 0xf;
4473 stepping = fms & 0xf; 4459 stepping = fms & 0xf;
4474 if (family == 6 || family == 0xf) 4460 if (family == 0xf)
4461 family += (fms >> 20) & 0xff;
4462 if (family >= 6)
4475 model += ((fms >> 16) & 0xf) << 4; 4463 model += ((fms >> 16) & 0xf) << 4;
4476 4464
4477 if (!quiet) { 4465 if (!quiet) {
@@ -4840,16 +4828,8 @@ void topology_probe()
4840 siblings = get_thread_siblings(&cpus[i]); 4828 siblings = get_thread_siblings(&cpus[i]);
4841 if (siblings > max_siblings) 4829 if (siblings > max_siblings)
4842 max_siblings = siblings; 4830 max_siblings = siblings;
4843 if (cpus[i].thread_id != -1) 4831 if (cpus[i].thread_id == 0)
4844 topo.num_cores++; 4832 topo.num_cores++;
4845
4846 if (debug > 1)
4847 fprintf(outf,
4848 "cpu %d pkg %d node %d core %d thread %d\n",
4849 i, cpus[i].physical_package_id,
4850 cpus[i].physical_node_id,
4851 cpus[i].physical_core_id,
4852 cpus[i].thread_id);
4853 } 4833 }
4854 4834
4855 topo.cores_per_node = max_core_id + 1; 4835 topo.cores_per_node = max_core_id + 1;
@@ -4875,6 +4855,20 @@ void topology_probe()
4875 topo.threads_per_core = max_siblings; 4855 topo.threads_per_core = max_siblings;
4876 if (debug > 1) 4856 if (debug > 1)
4877 fprintf(outf, "max_siblings %d\n", max_siblings); 4857 fprintf(outf, "max_siblings %d\n", max_siblings);
4858
4859 if (debug < 1)
4860 return;
4861
4862 for (i = 0; i <= topo.max_cpu_num; ++i) {
4863 fprintf(outf,
4864 "cpu %d pkg %d node %d lnode %d core %d thread %d\n",
4865 i, cpus[i].physical_package_id,
4866 cpus[i].physical_node_id,
4867 cpus[i].logical_node_id,
4868 cpus[i].physical_core_id,
4869 cpus[i].thread_id);
4870 }
4871
4878} 4872}
4879 4873
4880void 4874void
@@ -5102,7 +5096,7 @@ int get_and_dump_counters(void)
5102} 5096}
5103 5097
5104void print_version() { 5098void print_version() {
5105 fprintf(outf, "turbostat version 18.06.20" 5099 fprintf(outf, "turbostat version 18.07.27"
5106 " - Len Brown <lenb@kernel.org>\n"); 5100 " - Len Brown <lenb@kernel.org>\n");
5107} 5101}
5108 5102
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index f2f28b6c8915..810de20e8e26 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -158,6 +158,15 @@ struct bpf_map_def {
158 unsigned int numa_node; 158 unsigned int numa_node;
159}; 159};
160 160
161#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
162 struct ____btf_map_##name { \
163 type_key key; \
164 type_val value; \
165 }; \
166 struct ____btf_map_##name \
167 __attribute__ ((section(".maps." #name), used)) \
168 ____btf_map_##name = { }
169
161static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = 170static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
162 (void *) BPF_FUNC_skb_load_bytes; 171 (void *) BPF_FUNC_skb_load_bytes;
163static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = 172static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 3619f3023088..ffdd27737c9e 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -247,6 +247,34 @@ static struct btf_raw_test raw_tests[] = {
247 .max_entries = 4, 247 .max_entries = 4,
248}, 248},
249 249
250{
251 .descr = "struct test #3 Invalid member offset",
252 .raw_types = {
253 /* int */ /* [1] */
254 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
255 /* int64 */ /* [2] */
256 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8),
257
258 /* struct A { */ /* [3] */
259 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 16),
260 BTF_MEMBER_ENC(NAME_TBD, 1, 64), /* int m; */
261 BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* int64 n; */
262 /* } */
263 BTF_END_RAW,
264 },
265 .str_sec = "\0A\0m\0n\0",
266 .str_sec_size = sizeof("\0A\0m\0n\0"),
267 .map_type = BPF_MAP_TYPE_ARRAY,
268 .map_name = "struct_test3_map",
269 .key_size = sizeof(int),
270 .value_size = 16,
271 .key_type_id = 1,
272 .value_type_id = 3,
273 .max_entries = 4,
274 .btf_load_err = true,
275 .err_str = "Invalid member bits_offset",
276},
277
250/* Test member exceeds the size of struct. 278/* Test member exceeds the size of struct.
251 * 279 *
252 * struct A { 280 * struct A {
@@ -479,7 +507,7 @@ static struct btf_raw_test raw_tests[] = {
479 .key_size = sizeof(int), 507 .key_size = sizeof(int),
480 .value_size = sizeof(void *) * 4, 508 .value_size = sizeof(void *) * 4,
481 .key_type_id = 1, 509 .key_type_id = 1,
482 .value_type_id = 4, 510 .value_type_id = 5,
483 .max_entries = 4, 511 .max_entries = 4,
484}, 512},
485 513
@@ -1264,6 +1292,88 @@ static struct btf_raw_test raw_tests[] = {
1264 .err_str = "type != 0", 1292 .err_str = "type != 0",
1265}, 1293},
1266 1294
1295{
1296 .descr = "arraymap invalid btf key (a bit field)",
1297 .raw_types = {
1298 /* int */ /* [1] */
1299 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1300 /* 32 bit int with 32 bit offset */ /* [2] */
1301 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 32, 32, 8),
1302 BTF_END_RAW,
1303 },
1304 .str_sec = "",
1305 .str_sec_size = sizeof(""),
1306 .map_type = BPF_MAP_TYPE_ARRAY,
1307 .map_name = "array_map_check_btf",
1308 .key_size = sizeof(int),
1309 .value_size = sizeof(int),
1310 .key_type_id = 2,
1311 .value_type_id = 1,
1312 .max_entries = 4,
1313 .map_create_err = true,
1314},
1315
1316{
1317 .descr = "arraymap invalid btf key (!= 32 bits)",
1318 .raw_types = {
1319 /* int */ /* [1] */
1320 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1321 /* 16 bit int with 0 bit offset */ /* [2] */
1322 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 16, 2),
1323 BTF_END_RAW,
1324 },
1325 .str_sec = "",
1326 .str_sec_size = sizeof(""),
1327 .map_type = BPF_MAP_TYPE_ARRAY,
1328 .map_name = "array_map_check_btf",
1329 .key_size = sizeof(int),
1330 .value_size = sizeof(int),
1331 .key_type_id = 2,
1332 .value_type_id = 1,
1333 .max_entries = 4,
1334 .map_create_err = true,
1335},
1336
1337{
1338 .descr = "arraymap invalid btf value (too small)",
1339 .raw_types = {
1340 /* int */ /* [1] */
1341 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1342 BTF_END_RAW,
1343 },
1344 .str_sec = "",
1345 .str_sec_size = sizeof(""),
1346 .map_type = BPF_MAP_TYPE_ARRAY,
1347 .map_name = "array_map_check_btf",
1348 .key_size = sizeof(int),
1349 /* btf_value_size < map->value_size */
1350 .value_size = sizeof(__u64),
1351 .key_type_id = 1,
1352 .value_type_id = 1,
1353 .max_entries = 4,
1354 .map_create_err = true,
1355},
1356
1357{
1358 .descr = "arraymap invalid btf value (too big)",
1359 .raw_types = {
1360 /* int */ /* [1] */
1361 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1362 BTF_END_RAW,
1363 },
1364 .str_sec = "",
1365 .str_sec_size = sizeof(""),
1366 .map_type = BPF_MAP_TYPE_ARRAY,
1367 .map_name = "array_map_check_btf",
1368 .key_size = sizeof(int),
1369 /* btf_value_size > map->value_size */
1370 .value_size = sizeof(__u16),
1371 .key_type_id = 1,
1372 .value_type_id = 1,
1373 .max_entries = 4,
1374 .map_create_err = true,
1375},
1376
1267}; /* struct btf_raw_test raw_tests[] */ 1377}; /* struct btf_raw_test raw_tests[] */
1268 1378
1269static const char *get_next_str(const char *start, const char *end) 1379static const char *get_next_str(const char *start, const char *end)
@@ -2023,7 +2133,7 @@ static struct btf_raw_test pprint_test = {
2023 BTF_ENUM_ENC(NAME_TBD, 2), 2133 BTF_ENUM_ENC(NAME_TBD, 2),
2024 BTF_ENUM_ENC(NAME_TBD, 3), 2134 BTF_ENUM_ENC(NAME_TBD, 3),
2025 /* struct pprint_mapv */ /* [16] */ 2135 /* struct pprint_mapv */ /* [16] */
2026 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 28), 2136 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 32),
2027 BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ 2137 BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */
2028 BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ 2138 BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */
2029 BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ 2139 BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
index 8c7ca096ecf2..b21b876f475d 100644
--- a/tools/testing/selftests/bpf/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -10,11 +10,6 @@ struct ipv_counts {
10 unsigned int v6; 10 unsigned int v6;
11}; 11};
12 12
13typedef int btf_map_key;
14typedef struct ipv_counts btf_map_value;
15btf_map_key dumm_key;
16btf_map_value dummy_value;
17
18struct bpf_map_def SEC("maps") btf_map = { 13struct bpf_map_def SEC("maps") btf_map = {
19 .type = BPF_MAP_TYPE_ARRAY, 14 .type = BPF_MAP_TYPE_ARRAY,
20 .key_size = sizeof(int), 15 .key_size = sizeof(int),
@@ -22,6 +17,8 @@ struct bpf_map_def SEC("maps") btf_map = {
22 .max_entries = 4, 17 .max_entries = 4,
23}; 18};
24 19
20BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
21
25struct dummy_tracepoint_args { 22struct dummy_tracepoint_args {
26 unsigned long long pad; 23 unsigned long long pad;
27 struct sock *sock; 24 struct sock *sock;
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 270fa8f49573..785eabf2a593 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -115,14 +115,14 @@ ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o
115ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link 115ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
116 116
117ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65 117ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
118ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4 118ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
119 119
120ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6 120ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
121ip netns exec ns4 ip -6 addr add fc42::1 dev lo 121ip netns exec ns4 ip -6 addr add fc42::1 dev lo
122ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87 122ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
123 123
124ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 124ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
125ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8 125ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
126 126
127ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo 127ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
128ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo 128ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 9e78df207919..0c7d9e556b47 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -354,7 +354,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
354 while (s->bytes_recvd < total_bytes) { 354 while (s->bytes_recvd < total_bytes) {
355 if (txmsg_cork) { 355 if (txmsg_cork) {
356 timeout.tv_sec = 0; 356 timeout.tv_sec = 0;
357 timeout.tv_usec = 1000; 357 timeout.tv_usec = 300000;
358 } else { 358 } else {
359 timeout.tv_sec = 1; 359 timeout.tv_sec = 1;
360 timeout.tv_usec = 0; 360 timeout.tv_usec = 0;
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 77f762780199..e8c5dff448eb 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -402,7 +402,7 @@ int main(int argc, char *argv[])
402 exit(1); 402 exit(1);
403 } 403 }
404 404
405 fd = socket(AF_INET6, SOCK_STREAM, 0); 405 fd = socket(cfg_family, SOCK_STREAM, 0);
406 if (fd == -1) { 406 if (fd == -1) {
407 perror("socket"); 407 perror("socket");
408 exit(1); 408 exit(1);
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index c15f270e121d..65541c21a544 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -1,6 +1,6 @@
1#!/bin/bash 1#!/bin/bash
2# 2#
3# Usage: configinit.sh config-spec-file [ build output dir ] 3# Usage: configinit.sh config-spec-file build-output-dir results-dir
4# 4#
5# Create a .config file from the spec file. Run from the kernel source tree. 5# Create a .config file from the spec file. Run from the kernel source tree.
6# Exits with 0 if all went well, with 1 if all went well but the config 6# Exits with 0 if all went well, with 1 if all went well but the config
@@ -40,20 +40,18 @@ mkdir $T
40 40
41c=$1 41c=$1
42buildloc=$2 42buildloc=$2
43resdir=$3
43builddir= 44builddir=
44if test -n $buildloc 45if echo $buildloc | grep -q '^O='
45then 46then
46 if echo $buildloc | grep -q '^O=' 47 builddir=`echo $buildloc | sed -e 's/^O=//'`
48 if test ! -d $builddir
47 then 49 then
48 builddir=`echo $buildloc | sed -e 's/^O=//'` 50 mkdir $builddir
49 if test ! -d $builddir
50 then
51 mkdir $builddir
52 fi
53 else
54 echo Bad build directory: \"$buildloc\"
55 exit 2
56 fi 51 fi
52else
53 echo Bad build directory: \"$buildloc\"
54 exit 2
57fi 55fi
58 56
59sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh 57sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
@@ -61,12 +59,12 @@ sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
61grep '^grep' < $T/u.sh > $T/upd.sh 59grep '^grep' < $T/u.sh > $T/upd.sh
62echo "cat - $c" >> $T/upd.sh 60echo "cat - $c" >> $T/upd.sh
63make mrproper 61make mrproper
64make $buildloc distclean > $builddir/Make.distclean 2>&1 62make $buildloc distclean > $resdir/Make.distclean 2>&1
65make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1 63make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
66mv $builddir/.config $builddir/.config.sav 64mv $builddir/.config $builddir/.config.sav
67sh $T/upd.sh < $builddir/.config.sav > $builddir/.config 65sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
68cp $builddir/.config $builddir/.config.new 66cp $builddir/.config $builddir/.config.new
69yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err 67yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
70 68
71# verify new config matches specification. 69# verify new config matches specification.
72configcheck.sh $builddir/.config $c 70configcheck.sh $builddir/.config $c
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 34d126734cde..9115fcdb5617 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -2,7 +2,7 @@
2# 2#
3# Build a kvm-ready Linux kernel from the tree in the current directory. 3# Build a kvm-ready Linux kernel from the tree in the current directory.
4# 4#
5# Usage: kvm-build.sh config-template build-dir 5# Usage: kvm-build.sh config-template build-dir resdir
6# 6#
7# This program is free software; you can redistribute it and/or modify 7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by 8# it under the terms of the GNU General Public License as published by
@@ -29,6 +29,7 @@ then
29 exit 1 29 exit 1
30fi 30fi
31builddir=${2} 31builddir=${2}
32resdir=${3}
32 33
33T=${TMPDIR-/tmp}/test-linux.sh.$$ 34T=${TMPDIR-/tmp}/test-linux.sh.$$
34trap 'rm -rf $T' 0 35trap 'rm -rf $T' 0
@@ -41,19 +42,19 @@ CONFIG_VIRTIO_PCI=y
41CONFIG_VIRTIO_CONSOLE=y 42CONFIG_VIRTIO_CONSOLE=y
42___EOF___ 43___EOF___
43 44
44configinit.sh $T/config O=$builddir 45configinit.sh $T/config O=$builddir $resdir
45retval=$? 46retval=$?
46if test $retval -gt 1 47if test $retval -gt 1
47then 48then
48 exit 2 49 exit 2
49fi 50fi
50ncpus=`cpus2use.sh` 51ncpus=`cpus2use.sh`
51make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $builddir/Make.out 2>&1 52make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
52retval=$? 53retval=$?
53if test $retval -ne 0 || grep "rcu[^/]*": < $builddir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $builddir/Make.out 54if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
54then 55then
55 echo Kernel build error 56 echo Kernel build error
56 egrep "Stop|Error|error:|warning:" < $builddir/Make.out 57 egrep "Stop|Error|error:|warning:" < $resdir/Make.out
57 echo Run aborted. 58 echo Run aborted.
58 exit 3 59 exit 3
59fi 60fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 477ecb1293ab..0fa8a61ccb7b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -70,4 +70,5 @@ else
70 else 70 else
71 print_warning $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i 71 print_warning $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i
72 fi 72 fi
73 echo $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i > $i/console.log.rcu.diags
73fi 74fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index c27e97824163..c9bab57a77eb 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -39,6 +39,7 @@ do
39 head -1 $resdir/log 39 head -1 $resdir/log
40 fi 40 fi
41 TORTURE_SUITE="`cat $i/../TORTURE_SUITE`" 41 TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
42 rm -f $i/console.log.*.diags
42 kvm-recheck-${TORTURE_SUITE}.sh $i 43 kvm-recheck-${TORTURE_SUITE}.sh $i
43 if test -f "$i/console.log" 44 if test -f "$i/console.log"
44 then 45 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index c5b0f94341d9..f7247ee00514 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -98,14 +98,15 @@ then
98 ln -s $base_resdir/.config $resdir # for kvm-recheck.sh 98 ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
99 # Arch-independent indicator 99 # Arch-independent indicator
100 touch $resdir/builtkernel 100 touch $resdir/builtkernel
101elif kvm-build.sh $T/Kc2 $builddir 101elif kvm-build.sh $T/Kc2 $builddir $resdir
102then 102then
103 # Had to build a kernel for this test. 103 # Had to build a kernel for this test.
104 QEMU="`identify_qemu $builddir/vmlinux`" 104 QEMU="`identify_qemu $builddir/vmlinux`"
105 BOOT_IMAGE="`identify_boot_image $QEMU`" 105 BOOT_IMAGE="`identify_boot_image $QEMU`"
106 cp $builddir/Make*.out $resdir
107 cp $builddir/vmlinux $resdir 106 cp $builddir/vmlinux $resdir
108 cp $builddir/.config $resdir 107 cp $builddir/.config $resdir
108 cp $builddir/Module.symvers $resdir > /dev/null || :
109 cp $builddir/System.map $resdir > /dev/null || :
109 if test -n "$BOOT_IMAGE" 110 if test -n "$BOOT_IMAGE"
110 then 111 then
111 cp $builddir/$BOOT_IMAGE $resdir 112 cp $builddir/$BOOT_IMAGE $resdir
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 56610dbbdf73..5a7a62d76a50 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -347,7 +347,7 @@ function dump(first, pastlast, batchnum)
347 print "needqemurun=" 347 print "needqemurun="
348 jn=1 348 jn=1
349 for (j = first; j < pastlast; j++) { 349 for (j = first; j < pastlast; j++) {
350 builddir=KVM "/b" jn 350 builddir=KVM "/b1"
351 cpusr[jn] = cpus[j]; 351 cpusr[jn] = cpus[j];
352 if (cfrep[cf[j]] == "") { 352 if (cfrep[cf[j]] == "") {
353 cfr[jn] = cf[j]; 353 cfr[jn] = cf[j];
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 17293436f551..84933f6aed77 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -163,6 +163,13 @@ then
163 print_warning Summary: $summary 163 print_warning Summary: $summary
164 cat $T.diags >> $file.diags 164 cat $T.diags >> $file.diags
165fi 165fi
166for i in $file.*.diags
167do
168 if test -f "$i"
169 then
170 cat $i >> $file.diags
171 fi
172done
166if ! test -s $file.diags 173if ! test -s $file.diags
167then 174then
168 rm -f $file.diags 175 rm -f $file.diags
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 5d2cc0bd50a0..5c3213cc3ad7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -1,5 +1,5 @@
1rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 1rcutorture.onoff_interval=200 rcutorture.onoff_holdoff=30
2rcutree.gp_preinit_delay=3 2rcutree.gp_preinit_delay=12
3rcutree.gp_init_delay=3 3rcutree.gp_init_delay=3
4rcutree.gp_cleanup_delay=3 4rcutree.gp_cleanup_delay=3
5rcutree.kthread_prio=2 5rcutree.kthread_prio=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
deleted file mode 100644
index 883149b5f2d1..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
+++ /dev/null
@@ -1 +0,0 @@
1rcutree.rcu_fanout_exact=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index 24ec91041957..7bab8246392b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -39,7 +39,7 @@ rcutorture_param_onoff () {
39 if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" 39 if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
40 then 40 then
41 echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 41 echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2
42 echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 42 echo rcutorture.onoff_interval=1000 rcutorture.onoff_holdoff=30
43 fi 43 fi
44} 44}
45 45
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index ca6cd146aafe..dcf73c5dab6e 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -134,6 +134,11 @@ int main(int argv, char **argc)
134 printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000))); 134 printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
135 135
136 if (llabs(eppm - ppm) > 1000) { 136 if (llabs(eppm - ppm) > 1000) {
137 if (tx1.offset || tx2.offset ||
138 tx1.freq != tx2.freq || tx1.tick != tx2.tick) {
139 printf(" [SKIP]\n");
140 return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
141 }
137 printf(" [FAILED]\n"); 142 printf(" [FAILED]\n");
138 return ksft_exit_fail(); 143 return ksft_exit_fail();
139 } 144 }
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 0ac3caf90877..d0351f83aebe 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -13,8 +13,8 @@
13} while (0); 13} while (0);
14/* Weak barriers should be used. If not - it's a bug */ 14/* Weak barriers should be used. If not - it's a bug */
15# define mb() abort() 15# define mb() abort()
16# define rmb() abort() 16# define dma_rmb() abort()
17# define wmb() abort() 17# define dma_wmb() abort()
18#else 18#else
19#error Please fill in barrier macros 19#error Please fill in barrier macros
20#endif 20#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index fca8381bbe04..fb22bccfbc8a 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -52,6 +52,11 @@ static inline void *kmalloc(size_t s, gfp_t gfp)
52 return __kmalloc_fake; 52 return __kmalloc_fake;
53 return malloc(s); 53 return malloc(s);
54} 54}
55static inline void *kmalloc_array(unsigned n, size_t s, gfp_t gfp)
56{
57 return kmalloc(n * s, gfp);
58}
59
55static inline void *kzalloc(size_t s, gfp_t gfp) 60static inline void *kzalloc(size_t s, gfp_t gfp)
56{ 61{
57 void *p = kmalloc(s, gfp); 62 void *p = kmalloc(s, gfp);
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 04e554cae3a2..108250e4d376 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -604,7 +604,7 @@ void kvm_arm_resume_guest(struct kvm *kvm)
604 604
605 kvm_for_each_vcpu(i, vcpu, kvm) { 605 kvm_for_each_vcpu(i, vcpu, kvm) {
606 vcpu->arch.pause = false; 606 vcpu->arch.pause = false;
607 swake_up(kvm_arch_vcpu_wq(vcpu)); 607 swake_up_one(kvm_arch_vcpu_wq(vcpu));
608 } 608 }
609} 609}
610 610
@@ -612,7 +612,7 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
612{ 612{
613 struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); 613 struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
614 614
615 swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && 615 swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
616 (!vcpu->arch.pause))); 616 (!vcpu->arch.pause)));
617 617
618 if (vcpu->arch.power_off || vcpu->arch.pause) { 618 if (vcpu->arch.power_off || vcpu->arch.pause) {
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index c95ab4c5a475..9b73d3ad918a 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -155,7 +155,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
155 smp_mb(); /* Make sure the above is visible */ 155 smp_mb(); /* Make sure the above is visible */
156 156
157 wq = kvm_arch_vcpu_wq(vcpu); 157 wq = kvm_arch_vcpu_wq(vcpu);
158 swake_up(wq); 158 swake_up_one(wq);
159 159
160 return PSCI_RET_SUCCESS; 160 return PSCI_RET_SUCCESS;
161} 161}
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 57bcb27dcf30..23c2519c5b32 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -107,7 +107,7 @@ static void async_pf_execute(struct work_struct *work)
107 trace_kvm_async_pf_completed(addr, gva); 107 trace_kvm_async_pf_completed(addr, gva);
108 108
109 if (swq_has_sleeper(&vcpu->wq)) 109 if (swq_has_sleeper(&vcpu->wq))
110 swake_up(&vcpu->wq); 110 swake_up_one(&vcpu->wq);
111 111
112 mmput(mm); 112 mmput(mm);
113 kvm_put_kvm(vcpu->kvm); 113 kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b47507faab5..3d233ebfbee9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2172,7 +2172,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
2172 kvm_arch_vcpu_blocking(vcpu); 2172 kvm_arch_vcpu_blocking(vcpu);
2173 2173
2174 for (;;) { 2174 for (;;) {
2175 prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 2175 prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
2176 2176
2177 if (kvm_vcpu_check_block(vcpu) < 0) 2177 if (kvm_vcpu_check_block(vcpu) < 0)
2178 break; 2178 break;
@@ -2214,7 +2214,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
2214 2214
2215 wqp = kvm_arch_vcpu_wq(vcpu); 2215 wqp = kvm_arch_vcpu_wq(vcpu);
2216 if (swq_has_sleeper(wqp)) { 2216 if (swq_has_sleeper(wqp)) {
2217 swake_up(wqp); 2217 swake_up_one(wqp);
2218 ++vcpu->stat.halt_wakeup; 2218 ++vcpu->stat.halt_wakeup;
2219 return true; 2219 return true;
2220 } 2220 }