summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-css23
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu23
-rw-r--r--Documentation/RCU/rcuref.txt21
-rw-r--r--Documentation/RCU/stallwarn.txt2
-rw-r--r--Documentation/RCU/whatisRCU.txt8
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt21
-rw-r--r--Documentation/arm64/elf_hwcaps.txt8
-rw-r--r--Documentation/atomic_t.txt26
-rw-r--r--Documentation/core-api/circular-buffers.rst2
-rw-r--r--Documentation/core-api/timekeeping.rst12
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt29
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt20
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt43
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt21
-rw-r--r--Documentation/devicetree/bindings/riscv/cpus.yaml26
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt25
-rw-r--r--Documentation/driver-api/s390-drivers.rst4
-rw-r--r--Documentation/locking/lockdep-design.txt112
-rw-r--r--Documentation/memory-barriers.txt2
-rw-r--r--Documentation/process/changes.rst6
-rw-r--r--Documentation/s390/3270.rst (renamed from Documentation/s390/3270.txt)85
-rw-r--r--Documentation/s390/Debugging390.txt2142
-rw-r--r--Documentation/s390/cds.rst (renamed from Documentation/s390/cds.txt)368
-rw-r--r--Documentation/s390/common_io.rst (renamed from Documentation/s390/CommonIO)49
-rw-r--r--Documentation/s390/dasd.rst (renamed from Documentation/s390/DASD)33
-rw-r--r--Documentation/s390/debugging390.rst2613
-rw-r--r--Documentation/s390/driver-model.rst (renamed from Documentation/s390/driver-model.txt)179
-rw-r--r--Documentation/s390/index.rst30
-rw-r--r--Documentation/s390/monreader.rst (renamed from Documentation/s390/monreader.txt)85
-rw-r--r--Documentation/s390/qeth.rst (renamed from Documentation/s390/qeth.txt)36
-rw-r--r--Documentation/s390/s390dbf.rst487
-rw-r--r--Documentation/s390/s390dbf.txt667
-rw-r--r--Documentation/s390/text_files.rst11
-rw-r--r--Documentation/s390/vfio-ap.rst (renamed from Documentation/s390/vfio-ap.txt)499
-rw-r--r--Documentation/s390/vfio-ccw.rst (renamed from Documentation/s390/vfio-ccw.txt)92
-rw-r--r--Documentation/s390/zfcpdump.rst (renamed from Documentation/s390/zfcpdump.txt)2
-rw-r--r--Documentation/sysctl/kernel.txt16
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt2
-rw-r--r--MAINTAINERS50
-rw-r--r--Makefile4
-rw-r--r--arch/alpha/include/asm/atomic.h20
-rw-r--r--arch/alpha/kernel/smp.c19
-rw-r--r--arch/alpha/oprofile/common.c6
-rw-r--r--arch/arc/Makefile4
-rw-r--r--arch/arc/include/asm/atomic.h41
-rw-r--r--arch/arc/plat-hsdk/platform.c161
-rw-r--r--arch/arm/boot/dts/armada-xp-98dx3236.dtsi8
-rw-r--r--arch/arm/boot/dts/gemini-dlink-dir-685.dts2
-rw-r--r--arch/arm/boot/dts/gemini-dlink-dns-313.dts2
-rw-r--r--arch/arm/boot/dts/imx6ul.dtsi8
-rw-r--r--arch/arm/boot/dts/meson8.dtsi5
-rw-r--r--arch/arm/boot/dts/meson8b.dtsi11
-rw-r--r--arch/arm/common/bL_switcher.c6
-rw-r--r--arch/arm/include/asm/arch_timer.h10
-rw-r--r--arch/arm/include/asm/atomic.h50
-rw-r--r--arch/arm/mach-davinci/board-da830-evm.c5
-rw-r--r--arch/arm/mach-davinci/board-omapl138-hawk.c3
-rw-r--r--arch/arm/mach-omap2/prm3xxx.c2
-rw-r--r--arch/arm64/Kconfig38
-rw-r--r--arch/arm64/Makefile23
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi18
-rw-r--r--arch/arm64/configs/defconfig2
-rw-r--r--arch/arm64/include/asm/acpi.h3
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h4
-rw-r--r--arch/arm64/include/asm/arch_timer.h13
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h20
-rw-r--r--arch/arm64/include/asm/atomic_lse.h34
-rw-r--r--arch/arm64/include/asm/cache.h5
-rw-r--r--arch/arm64/include/asm/cacheflush.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/daifflags.h75
-rw-r--r--arch/arm64/include/asm/elf.h14
-rw-r--r--arch/arm64/include/asm/fpsimd.h5
-rw-r--r--arch/arm64/include/asm/hwcap.h2
-rw-r--r--arch/arm64/include/asm/irqflags.h79
-rw-r--r--arch/arm64/include/asm/kvm_host.h7
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h3
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h1
-rw-r--r--arch/arm64/include/asm/pgtable.h56
-rw-r--r--arch/arm64/include/asm/ptrace.h10
-rw-r--r--arch/arm64/include/asm/signal32.h46
-rw-r--r--arch/arm64/include/asm/simd.h10
-rw-r--r--arch/arm64/include/asm/sysreg.h1
-rw-r--r--arch/arm64/include/asm/thread_info.h5
-rw-r--r--arch/arm64/include/asm/unistd.h5
-rw-r--r--arch/arm64/include/asm/vdso.h3
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h44
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h126
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h103
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h53
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h2
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h3
-rw-r--r--arch/arm64/kernel/Makefile6
-rw-r--r--arch/arm64/kernel/acpi.c10
-rw-r--r--arch/arm64/kernel/asm-offsets.c34
-rw-r--r--arch/arm64/kernel/cacheinfo.c9
-rw-r--r--arch/arm64/kernel/cpufeature.c8
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/entry.S84
-rw-r--r--arch/arm64/kernel/fpsimd.c139
-rw-r--r--arch/arm64/kernel/image.h6
-rw-r--r--arch/arm64/kernel/irq.c26
-rw-r--r--arch/arm64/kernel/module.c10
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/process.c2
-rw-r--r--arch/arm64/kernel/ptrace.c6
-rw-r--r--arch/arm64/kernel/signal32.c72
-rw-r--r--arch/arm64/kernel/sleep.S2
-rw-r--r--arch/arm64/kernel/smp.c27
-rw-r--r--arch/arm64/kernel/traps.c23
-rw-r--r--arch/arm64/kernel/vdso.c356
-rw-r--r--arch/arm64/kernel/vdso/Makefile41
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S323
-rw-r--r--arch/arm64/kernel/vdso/vgettimeofday.c27
-rw-r--r--arch/arm64/kernel/vdso32/.gitignore2
-rw-r--r--arch/arm64/kernel/vdso32/Makefile186
-rw-r--r--arch/arm64/kernel/vdso32/note.c15
-rw-r--r--arch/arm64/kernel/vdso32/sigreturn.S62
-rw-r--r--arch/arm64/kernel/vdso32/vdso.S19
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S82
-rw-r--r--arch/arm64/kernel/vdso32/vgettimeofday.c59
-rw-r--r--arch/arm64/kvm/fpsimd.c4
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/hyp/switch.c2
-rw-r--r--arch/arm64/mm/dma-mapping.c12
-rw-r--r--arch/arm64/mm/fault.c61
-rw-r--r--arch/arm64/mm/hugetlbpage.c12
-rw-r--r--arch/arm64/mm/init.c5
-rw-r--r--arch/arm64/mm/mmu.c14
-rw-r--r--arch/arm64/mm/pageattr.c48
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rw-r--r--arch/csky/kernel/signal.c5
-rw-r--r--arch/ia64/include/asm/atomic.h20
-rw-r--r--arch/ia64/kernel/perfmon.c12
-rw-r--r--arch/ia64/kernel/uncached.c8
-rw-r--r--arch/m68k/Kconfig3
-rw-r--r--arch/m68k/configs/amiga_defconfig17
-rw-r--r--arch/m68k/configs/apollo_defconfig17
-rw-r--r--arch/m68k/configs/atari_defconfig17
-rw-r--r--arch/m68k/configs/bvme6000_defconfig17
-rw-r--r--arch/m68k/configs/hp300_defconfig17
-rw-r--r--arch/m68k/configs/mac_defconfig17
-rw-r--r--arch/m68k/configs/multi_defconfig17
-rw-r--r--arch/m68k/configs/mvme147_defconfig17
-rw-r--r--arch/m68k/configs/mvme16x_defconfig17
-rw-r--r--arch/m68k/configs/q40_defconfig17
-rw-r--r--arch/m68k/configs/sun3_defconfig17
-rw-r--r--arch/m68k/configs/sun3x_defconfig17
-rw-r--r--arch/m68k/kernel/dma.c57
-rw-r--r--arch/mips/Makefile3
-rw-r--r--arch/mips/boot/compressed/Makefile2
-rw-r--r--arch/mips/boot/compressed/calc_vmlinuz_load_addr.c2
-rw-r--r--arch/mips/include/asm/atomic.h22
-rw-r--r--arch/mips/include/asm/mach-ath79/ar933x_uart.h4
-rw-r--r--arch/mips/include/asm/mips-gic.h30
-rw-r--r--arch/mips/mm/mmap.c2
-rw-r--r--arch/mips/mm/tlbex.c29
-rw-r--r--arch/parisc/kernel/module.c4
-rw-r--r--arch/powerpc/include/asm/atomic.h44
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2
-rw-r--r--arch/powerpc/kernel/ptrace.c1
-rw-r--r--arch/powerpc/kernel/rtas.c3
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c55
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi6
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts13
-rw-r--r--arch/riscv/configs/defconfig5
-rw-r--r--arch/riscv/include/asm/atomic.h44
-rw-r--r--arch/riscv/mm/fault.c3
-rw-r--r--arch/s390/Kconfig41
-rw-r--r--arch/s390/configs/debug_defconfig2
-rw-r--r--arch/s390/configs/defconfig600
-rw-r--r--arch/s390/configs/performance_defconfig678
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/crypto/ghash_s390.c2
-rw-r--r--arch/s390/crypto/prng.c4
-rw-r--r--arch/s390/crypto/sha1_s390.c2
-rw-r--r--arch/s390/crypto/sha256_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c2
-rw-r--r--arch/s390/include/asm/airq.h2
-rw-r--r--arch/s390/include/asm/atomic.h38
-rw-r--r--arch/s390/include/asm/ccwdev.h4
-rw-r--r--arch/s390/include/asm/cio.h41
-rw-r--r--arch/s390/include/asm/ctl_reg.h9
-rw-r--r--arch/s390/include/asm/debug.h153
-rw-r--r--arch/s390/include/asm/facility.h21
-rw-r--r--arch/s390/include/asm/idals.h3
-rw-r--r--arch/s390/include/asm/kvm_host.h7
-rw-r--r--arch/s390/include/asm/mem_encrypt.h17
-rw-r--r--arch/s390/include/asm/pci.h5
-rw-r--r--arch/s390/include/asm/percpu.h2
-rw-r--r--arch/s390/include/asm/processor.h7
-rw-r--r--arch/s390/include/asm/smp.h35
-rw-r--r--arch/s390/include/asm/spinlock.h4
-rw-r--r--arch/s390/include/asm/tlbflush.h17
-rw-r--r--arch/s390/include/asm/unwind.h19
-rw-r--r--arch/s390/include/uapi/asm/runtime_instr.h2
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/debug.c105
-rw-r--r--arch/s390/kernel/dis.c5
-rw-r--r--arch/s390/kernel/dumpstack.c2
-rw-r--r--arch/s390/kernel/entry.S4
-rw-r--r--arch/s390/kernel/jump_label.c23
-rw-r--r--arch/s390/kernel/machine_kexec.c3
-rw-r--r--arch/s390/kernel/processor.c19
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/smp.c21
-rw-r--r--arch/s390/kernel/swsusp.S2
-rw-r--r--arch/s390/kernel/traps.c10
-rw-r--r--arch/s390/kernel/unwind_bc.c16
-rw-r--r--arch/s390/kvm/kvm-s390.c3
-rw-r--r--arch/s390/kvm/priv.c86
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/mm/init.c47
-rw-r--r--arch/s390/mm/maccess.c9
-rw-r--r--arch/s390/mm/mmap.c2
-rw-r--r--arch/s390/pci/pci.c15
-rw-r--r--arch/s390/pci/pci_clp.c2
-rw-r--r--arch/s390/pci/pci_debug.c2
-rw-r--r--arch/s390/purgatory/.gitignore3
-rw-r--r--arch/s390/tools/Makefile7
-rw-r--r--arch/s390/tools/opcodes.txt51
-rw-r--r--arch/sparc/include/asm/atomic_64.h8
-rw-r--r--arch/x86/Kconfig38
-rw-r--r--arch/x86/Kconfig.cpu13
-rw-r--r--arch/x86/entry/common.c17
-rw-r--r--arch/x86/entry/entry_32.S24
-rw-r--r--arch/x86/entry/entry_64.S36
-rw-r--r--arch/x86/entry/vdso/Makefile9
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c256
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso32.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdsox32.lds.S1
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/entry/vsyscall/Makefile2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c37
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_gtod.c83
-rw-r--r--arch/x86/events/core.c18
-rw-r--r--arch/x86/events/intel/ds.c9
-rw-r--r--arch/x86/events/intel/uncore.c1
-rw-r--r--arch/x86/events/perf_event.h21
-rw-r--r--arch/x86/hyperv/hv_init.c91
-rw-r--r--arch/x86/include/asm/apic.h5
-rw-r--r--arch/x86/include/asm/atomic.h8
-rw-r--r--arch/x86/include/asm/atomic64_32.h66
-rw-r--r--arch/x86/include/asm/atomic64_64.h46
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/cpufeatures.h21
-rw-r--r--arch/x86/include/asm/fpu/xstate.h1
-rw-r--r--arch/x86/include/asm/hpet.h7
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h6
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/irq_regs.h4
-rw-r--r--arch/x86/include/asm/jump_label.h2
-rw-r--r--arch/x86/include/asm/mshyperv.h81
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/mwait.h4
-rw-r--r--arch/x86/include/asm/percpu.h236
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/asm/smp.h3
-rw-r--r--arch/x86/include/asm/text-patching.h15
-rw-r--r--arch/x86/include/asm/time.h1
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h261
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h44
-rw-r--r--arch/x86/include/asm/vgtod.h75
-rw-r--r--arch/x86/include/asm/vsyscall.h6
-rw-r--r--arch/x86/include/asm/vvar.h7
-rw-r--r--arch/x86/include/uapi/asm/perf_regs.h3
-rw-r--r--arch/x86/kernel/acpi/cstate.c15
-rw-r--r--arch/x86/kernel/alternative.c154
-rw-r--r--arch/x86/kernel/apic/apic.c90
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/io_apic.c50
-rw-r--r--arch/x86/kernel/apic/msi.c4
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c12
-rw-r--r--arch/x86/kernel/cpu/bugs.c11
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c3
-rw-r--r--arch/x86/kernel/cpu/common.c58
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c9
-rw-r--r--arch/x86/kernel/cpu/intel.c27
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c92
-rw-r--r--arch/x86/kernel/cpu/mce/core.c177
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c37
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h12
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c14
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c15
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c15
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c35
-rw-r--r--arch/x86/kernel/cpu/scattered.c4
-rw-r--r--arch/x86/kernel/cpu/umwait.c200
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/cpu/zhaoxin.c167
-rw-r--r--arch/x86/kernel/fpu/core.c52
-rw-r--r--arch/x86/kernel/fpu/init.c19
-rw-r--r--arch/x86/kernel/fpu/xstate.c11
-rw-r--r--arch/x86/kernel/ftrace.c10
-rw-r--r--arch/x86/kernel/head64.c20
-rw-r--r--arch/x86/kernel/hpet.c935
-rw-r--r--arch/x86/kernel/i8253.c25
-rw-r--r--arch/x86/kernel/idt.c3
-rw-r--r--arch/x86/kernel/irq.c2
-rw-r--r--arch/x86/kernel/jailhouse.c2
-rw-r--r--arch/x86/kernel/jump_label.c121
-rw-r--r--arch/x86/kernel/perf_regs.c7
-rw-r--r--arch/x86/kernel/ptrace.c21
-rw-r--r--arch/x86/kernel/pvclock.c1
-rw-r--r--arch/x86/kernel/smp.c2
-rw-r--r--arch/x86/kernel/time.c7
-rw-r--r--arch/x86/kernel/tls.c9
-rw-r--r--arch/x86/kernel/tsc.c57
-rw-r--r--arch/x86/kernel/tsc_msr.c4
-rw-r--r--arch/x86/kernel/unwind_orc.c26
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/pmu.c4
-rw-r--r--arch/x86/kvm/vmx/nested.c30
-rw-r--r--arch/x86/kvm/x86.c19
-rw-r--r--arch/x86/lib/cache-smp.c3
-rw-r--r--arch/x86/mm/fault.c18
-rw-r--r--arch/x86/mm/init_64.c24
-rw-r--r--arch/x86/platform/efi/quirks.c2
-rw-r--r--arch/x86/ras/Kconfig10
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/blk-mq-debugfs.c7
-rw-r--r--crypto/cryptd.c1
-rw-r--r--crypto/crypto_user_base.c3
-rw-r--r--drivers/acpi/acpi_pad.c1
-rw-r--r--drivers/acpi/irq.c26
-rw-r--r--drivers/acpi/pptt.c61
-rw-r--r--drivers/acpi/processor_idle.c1
-rw-r--r--drivers/auxdisplay/cfag12864bfb.c5
-rw-r--r--drivers/auxdisplay/ht16k33.c4
-rw-r--r--drivers/base/cacheinfo.c5
-rw-r--r--drivers/char/agp/generic.c3
-rw-r--r--drivers/clk/clk.c2
-rw-r--r--drivers/clk/meson/g12a.c4
-rw-r--r--drivers/clk/meson/g12a.h2
-rw-r--r--drivers/clk/meson/meson8b.c10
-rw-r--r--drivers/clk/socfpga/clk-s10.c4
-rw-r--r--drivers/clk/tegra/clk-tegra210.c2
-rw-r--r--drivers/clk/ti/clkctrl.c7
-rw-r--r--drivers/clocksource/Kconfig14
-rw-r--r--drivers/clocksource/Makefile5
-rw-r--r--drivers/clocksource/arc_timer.c3
-rw-r--r--drivers/clocksource/arm_arch_timer.c15
-rw-r--r--drivers/clocksource/exynos_mct.c4
-rw-r--r--drivers/clocksource/hyperv_timer.c339
-rw-r--r--drivers/clocksource/timer-davinci.c369
-rw-r--r--drivers/clocksource/timer-imx-sysctr.c145
-rw-r--r--drivers/clocksource/timer-ixp4xx.c16
-rw-r--r--drivers/clocksource/timer-meson6.c5
-rw-r--r--drivers/clocksource/timer-tegra.c416
-rw-r--r--drivers/clocksource/timer-tegra20.c379
-rw-r--r--drivers/crypto/nx/nx-842-pseries.c6
-rw-r--r--drivers/dma/dma-jz4780.c5
-rw-r--r--drivers/dma/imx-sdma.c52
-rw-r--r--drivers/dma/qcom/bam_dma.c3
-rw-r--r--drivers/firmware/efi/efi-bgrt.c5
-rw-r--r--drivers/firmware/efi/efi.c12
-rw-r--r--drivers/firmware/efi/efibc.c12
-rw-r--r--drivers/gpio/gpio-mb86s7x.c51
-rw-r--r--drivers/gpio/gpiolib-of.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hwmgr.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c7
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c6
-rw-r--r--drivers/gpu/drm/imx/ipuv3-crtc.c6
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c2
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_vq.c2
-rw-r--r--drivers/hid/hid-ids.h3
-rw-r--r--drivers/hid/hid-logitech-dj.c4
-rw-r--r--drivers/hid/hid-multitouch.c4
-rw-r--r--drivers/hid/hid-quirks.c1
-rw-r--r--drivers/hid/hid-uclogic-core.c2
-rw-r--r--drivers/hid/hid-uclogic-params.c2
-rw-r--r--drivers/hid/intel-ish-hid/ishtp-fw-loader.c2
-rw-r--r--drivers/hid/intel-ish-hid/ishtp-hid-client.c4
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/bus.c15
-rw-r--r--drivers/hv/Kconfig3
-rw-r--r--drivers/hv/hv.c156
-rw-r--r--drivers/hv/hv_util.c1
-rw-r--r--drivers/hv/hyperv_vmbus.h3
-rw-r--r--drivers/hv/vmbus_drv.c42
-rw-r--r--drivers/iio/humidity/dht11.c8
-rw-r--r--drivers/iio/industrialio-core.c4
-rw-r--r--drivers/infiniband/core/device.c2
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c6
-rw-r--r--drivers/irqchip/Kconfig32
-rw-r--r--drivers/irqchip/Makefile2
-rw-r--r--drivers/irqchip/irq-al-fic.c278
-rw-r--r--drivers/irqchip/irq-csky-mpintc.c101
-rw-r--r--drivers/irqchip/irq-gic-v2m.c85
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c35
-rw-r--r--drivers/irqchip/irq-gic-v3.c10
-rw-r--r--drivers/irqchip/irq-mbigen.c3
-rw-r--r--drivers/irqchip/irq-meson-gpio.c1
-rw-r--r--drivers/irqchip/irq-mips-gic.c4
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c3
-rw-r--r--drivers/irqchip/irq-renesas-irqc.c91
-rw-r--r--drivers/irqchip/irq-renesas-rza1.c283
-rw-r--r--drivers/irqchip/irq-sni-exiu.c142
-rw-r--r--drivers/irqchip/irq-ti-sci-inta.c4
-rw-r--r--drivers/irqchip/qcom-irq-combiner.c5
-rw-r--r--drivers/leds/trigger/ledtrig-activity.c2
-rw-r--r--drivers/md/dm-init.c10
-rw-r--r--drivers/md/dm-log-writes.c23
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-verity-target.c4
-rw-r--r--drivers/mfd/stmfx.c12
-rw-r--r--drivers/mtd/nand/raw/ingenic/Kconfig2
-rw-r--r--drivers/mtd/nand/raw/ingenic/Makefile4
-rw-r--r--drivers/mtd/nand/raw/ingenic/ingenic_ecc.c9
-rw-r--r--drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c (renamed from drivers/mtd/nand/raw/ingenic/ingenic_nand.c)0
-rw-r--r--drivers/mtd/nand/raw/nand_base.c3
-rw-r--r--drivers/mtd/nand/raw/sunxi_nand.c40
-rw-r--r--drivers/mtd/nand/spi/gigadevice.c2
-rw-r--r--drivers/mtd/nand/spi/macronix.c4
-rw-r--r--drivers/mtd/spi-nor/spi-nor.c119
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c6
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_filters.c10
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.c1
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.h1
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c19
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c28
-rw-r--r--drivers/net/ethernet/sis/sis900.c16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c22
-rw-r--r--drivers/net/ppp/ppp_mppe.c1
-rw-r--r--drivers/net/team/team.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/vrf.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rx.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/utils.c2
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c2
-rw-r--r--drivers/net/wireless/ti/wlcore/main.c2
-rw-r--r--drivers/net/wireless/ti/wlcore/rx.c2
-rw-r--r--drivers/net/wireless/ti/wlcore/tx.c2
-rw-r--r--drivers/net/wireless/virt_wifi.c2
-rw-r--r--drivers/pci/pci-driver.c8
-rw-r--r--drivers/perf/Kconfig8
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm_pmu_acpi.c72
-rw-r--r--drivers/perf/arm_spe_pmu.c12
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c554
-rw-r--r--drivers/pinctrl/mediatek/mtk-eint.c34
-rw-r--r--drivers/pinctrl/pinctrl-mcp23s08.c8
-rw-r--r--drivers/pinctrl/pinctrl-ocelot.c18
-rw-r--r--drivers/ras/cec.c132
-rw-r--r--drivers/s390/block/Kconfig2
-rw-r--r--drivers/s390/block/dasd_devmap.c2
-rw-r--r--drivers/s390/char/Kconfig22
-rw-r--r--drivers/s390/char/Makefile1
-rw-r--r--drivers/s390/char/sclp_async.c189
-rw-r--r--drivers/s390/char/zcore.c2
-rw-r--r--drivers/s390/cio/airq.c37
-rw-r--r--drivers/s390/cio/ccwreq.c9
-rw-r--r--drivers/s390/cio/chsc.c30
-rw-r--r--drivers/s390/cio/cio.h3
-rw-r--r--drivers/s390/cio/css.c187
-rw-r--r--drivers/s390/cio/device.c68
-rw-r--r--drivers/s390/cio/device_fsm.c49
-rw-r--r--drivers/s390/cio/device_id.c20
-rw-r--r--drivers/s390/cio/device_ops.c21
-rw-r--r--drivers/s390/cio/device_pgid.c22
-rw-r--r--drivers/s390/cio/device_status.c24
-rw-r--r--drivers/s390/cio/io_sch.h20
-rw-r--r--drivers/s390/cio/qdio_main.c1
-rw-r--r--drivers/s390/cio/qdio_setup.c2
-rw-r--r--drivers/s390/cio/qdio_thinint.c6
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.c524
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.h7
-rw-r--r--drivers/s390/cio/vfio_ccw_drv.c13
-rw-r--r--drivers/s390/crypto/pkey_api.c8
-rw-r--r--drivers/s390/crypto/vfio_ap_drv.c34
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c380
-rw-r--r--drivers/s390/crypto/vfio_ap_private.h15
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.c4
-rw-r--r--drivers/s390/net/Kconfig8
-rw-r--r--drivers/s390/virtio/virtio_ccw.c246
-rw-r--r--drivers/scsi/vmw_pvscsi.c6
-rw-r--r--drivers/soc/Makefile2
-rw-r--r--drivers/soc/ti/Kconfig4
-rw-r--r--drivers/target/iscsi/iscsi_target_auth.c16
-rw-r--r--drivers/target/target_core_iblock.c2
-rw-r--r--drivers/tty/tty_ldisc.c8
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/afs/callback.c4
-rw-r--r--fs/afs/inode.c31
-rw-r--r--fs/afs/internal.h8
-rw-r--r--fs/afs/volume.c1
-rw-r--r--fs/aio.c28
-rw-r--r--fs/binfmt_flat.c23
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/cifs/smb2ops.c64
-rw-r--r--fs/cifs/smb2pdu.h14
-rw-r--r--fs/dax.c11
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/inode.c2
-rw-r--r--fs/io_uring.c12
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c3
-rw-r--r--fs/select.c18
-rw-r--r--fs/userfaultfd.c42
-rw-r--r--include/asm-generic/atomic64.h20
-rw-r--r--include/asm-generic/vdso/vsyscall.h50
-rw-r--r--include/clocksource/hyperv_timer.h107
-rw-r--r--include/clocksource/timer-davinci.h44
-rw-r--r--include/dt-bindings/clock/g12a-clkc.h2
-rw-r--r--include/dt-bindings/clock/sifive-fu540-prci.h2
-rw-r--r--include/linux/acpi.h12
-rw-r--r--include/linux/cacheinfo.h2
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/device.h3
-rw-r--r--include/linux/hrtimer.h16
-rw-r--r--include/linux/hrtimer_defs.h27
-rw-r--r--include/linux/intel-ish-client-if.h1
-rw-r--r--include/linux/irqchip/arm-gic-common.h5
-rw-r--r--include/linux/irqchip/arm-gic.h3
-rw-r--r--include/linux/jump_label.h3
-rw-r--r--include/linux/kernel.h3
-rw-r--r--include/linux/lockdep.h43
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/mtd/spi-nor.h3
-rw-r--r--include/linux/pagemap.h13
-rw-r--r--include/linux/percpu-rwsem.h14
-rw-r--r--include/linux/perf/arm_pmu.h2
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/linux/perf_regs.h8
-rw-r--r--include/linux/pfn_t.h2
-rw-r--r--include/linux/processor.h9
-rw-r--r--include/linux/rcu_sync.h40
-rw-r--r--include/linux/rcupdate.h21
-rw-r--r--include/linux/rwsem.h16
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/sched/wake_q.h5
-rw-r--r--include/linux/signal.h2
-rw-r--r--include/linux/smp.h52
-rw-r--r--include/linux/srcutree.h14
-rw-r--r--include/linux/stop_machine.h1
-rw-r--r--include/linux/suspend.h26
-rw-r--r--include/linux/timekeeping.h32
-rw-r--r--include/linux/timer.h27
-rw-r--r--include/linux/torture.h2
-rw-r--r--include/linux/types.h2
-rw-r--r--include/linux/xarray.h1
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--include/net/route.h1
-rw-r--r--include/net/tls.h15
-rw-r--r--include/vdso/datapage.h89
-rw-r--r--include/vdso/helpers.h56
-rw-r--r--include/vdso/vsyscall.h11
-rw-r--r--init/init_task.c2
-rw-r--r--init/initramfs.c4
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/cgroup/cgroup.c3
-rw-r--r--kernel/cpu.c15
-rw-r--r--kernel/events/core.c27
-rw-r--r--kernel/events/uprobes.c4
-rw-r--r--kernel/fork.c70
-rw-r--r--kernel/futex.c69
-rw-r--r--kernel/irq/Makefile3
-rw-r--r--kernel/irq/affinity.c12
-rw-r--r--kernel/irq/autoprobe.c6
-rw-r--r--kernel/irq/chip.c10
-rw-r--r--kernel/irq/cpuhotplug.c2
-rw-r--r--kernel/irq/internals.h26
-rw-r--r--kernel/irq/irqdesc.c16
-rw-r--r--kernel/irq/irqdomain.c4
-rw-r--r--kernel/irq/manage.c90
-rw-r--r--kernel/irq/timings.c453
-rw-r--r--kernel/jump_label.c64
-rw-r--r--kernel/locking/Makefile2
-rw-r--r--kernel/locking/lock_events.h45
-rw-r--r--kernel/locking/lock_events_list.h12
-rw-r--r--kernel/locking/lockdep.c742
-rw-r--r--kernel/locking/lockdep_internals.h36
-rw-r--r--kernel/locking/locktorture.c2
-rw-r--r--kernel/locking/percpu-rwsem.c2
-rw-r--r--kernel/locking/rwsem-xadd.c745
-rw-r--r--kernel/locking/rwsem.c1453
-rw-r--r--kernel/locking/rwsem.h306
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/rcu/rcu.h5
-rw-r--r--kernel/rcu/rcutorture.c96
-rw-r--r--kernel/rcu/srcutree.c69
-rw-r--r--kernel/rcu/sync.c214
-rw-r--r--kernel/rcu/tree.c164
-rw-r--r--kernel/rcu/tree.h6
-rw-r--r--kernel/rcu/tree_exp.h53
-rw-r--r--kernel/rcu/tree_plugin.h195
-rw-r--r--kernel/rcu/tree_stall.h4
-rw-r--r--kernel/rcu/update.c13
-rw-r--r--kernel/sched/fair.c5
-rw-r--r--kernel/signal.c5
-rw-r--r--kernel/smp.c12
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c19
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/alarmtimer.c1
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/time/hrtimer.c8
-rw-r--r--kernel/time/ntp.c4
-rw-r--r--kernel/time/posix-timers.c13
-rw-r--r--kernel/time/time.c4
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/time/timer_list.c36
-rw-r--r--kernel/time/vsyscall.c133
-rw-r--r--kernel/torture.c23
-rw-r--r--kernel/trace/ftrace.c10
-rw-r--r--kernel/trace/trace.c24
-rw-r--r--kernel/up.c3
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Kconfig.debug28
-rw-r--r--lib/atomic64.c32
-rw-r--r--lib/debugobjects.c321
-rw-r--r--lib/devres.c3
-rw-r--r--lib/idr.c14
-rw-r--r--lib/mpi/mpi-pow.c6
-rw-r--r--lib/raid6/s390vx.uc2
-rw-r--r--lib/reed_solomon/Makefile2
-rw-r--r--lib/reed_solomon/decode_rs.c115
-rw-r--r--lib/reed_solomon/reed_solomon.c12
-rw-r--r--lib/reed_solomon/test_rslib.c518
-rw-r--r--lib/test_xarray.c38
-rw-r--r--lib/vdso/Kconfig36
-rw-r--r--lib/vdso/Makefile22
-rw-r--r--lib/vdso/gettimeofday.c239
-rw-r--r--lib/xarray.c12
-rw-r--r--mm/filemap.c146
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/hugetlb.c29
-rw-r--r--mm/khugepaged.c4
-rw-r--r--mm/memfd.c2
-rw-r--r--mm/memory-failure.c7
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/oom_kill.c12
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/page_idle.c4
-rw-r--r--mm/page_io.c20
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/vmalloc.c15
-rw-r--r--mm/vmscan.c27
-rw-r--r--net/bluetooth/6lowpan.c4
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/ipv4/ip_output.c12
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c33
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/route.c5
-rw-r--r--net/netfilter/nf_flow_table_ip.c2
-rw-r--r--net/packet/af_packet.c23
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/sched/sch_cbs.c9
-rw-r--r--net/sctp/endpointola.c8
-rw-r--r--net/smc/af_smc.c5
-rw-r--r--net/smc/smc_core.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c7
-rw-r--r--net/sunrpc/xprtsock.c16
-rw-r--r--net/tipc/core.c12
-rw-r--r--net/tipc/netlink_compat.c18
-rw-r--r--net/tls/tls_main.c3
-rw-r--r--samples/pidfd/pidfd-metadata.c8
-rwxr-xr-xscripts/atomic/check-atomics.sh2
-rw-r--r--security/apparmor/label.c8
-rw-r--r--sound/core/seq/oss/seq_oss_ioctl.c2
-rw-r--r--sound/core/seq/oss/seq_oss_rw.c2
-rw-r--r--sound/firewire/amdtp-am824.c2
-rw-r--r--sound/hda/hdac_device.c18
-rw-r--r--sound/pci/hda/patch_realtek.c8
-rw-r--r--sound/usb/line6/pcm.c5
-rw-r--r--sound/usb/mixer_quirks.c4
-rw-r--r--tools/arch/x86/include/uapi/asm/perf_regs.h3
-rw-r--r--tools/include/linux/rcu.h4
-rw-r--r--tools/memory-model/linux-kernel.bell6
-rw-r--r--tools/memory-model/linux-kernel.cat102
-rw-r--r--tools/memory-model/linux-kernel.def1
-rw-r--r--tools/memory-model/litmus-tests/MP+poonceonces.litmus2
-rw-r--r--tools/memory-model/litmus-tests/README2
-rw-r--r--tools/memory-model/lock.cat2
-rw-r--r--tools/memory-model/scripts/README4
-rwxr-xr-xtools/memory-model/scripts/checkalllitmus.sh2
-rwxr-xr-xtools/memory-model/scripts/checklitmus.sh2
-rw-r--r--tools/memory-model/scripts/parseargs.sh2
-rw-r--r--tools/memory-model/scripts/runlitmushist.sh2
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h1
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c4
-rw-r--r--tools/testing/radix-tree/idr-test.c46
-rw-r--r--tools/testing/radix-tree/linux/rcupdate.h2
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c1
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c87
-rw-r--r--tools/testing/selftests/rcutorture/Makefile3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh39
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh5
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh23
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL14
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot3
-rw-r--r--tools/testing/selftests/timers/freq-step.c6
-rw-r--r--tools/testing/selftests/x86/Makefile5
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c223
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c112
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c120
737 files changed, 21674 insertions, 12142 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-css b/Documentation/ABI/testing/sysfs-bus-css
index 2979c40c10e9..966f8504bd7b 100644
--- a/Documentation/ABI/testing/sysfs-bus-css
+++ b/Documentation/ABI/testing/sysfs-bus-css
@@ -33,3 +33,26 @@ Description: Contains the PIM/PAM/POM values, as reported by the
33 in sync with the values current in the channel subsystem). 33 in sync with the values current in the channel subsystem).
34 Note: This is an I/O-subchannel specific attribute. 34 Note: This is an I/O-subchannel specific attribute.
35Users: s390-tools, HAL 35Users: s390-tools, HAL
36
37What: /sys/bus/css/devices/.../driver_override
38Date: June 2019
39Contact: Cornelia Huck <cohuck@redhat.com>
40 linux-s390@vger.kernel.org
41Description: This file allows the driver for a device to be specified. When
42 specified, only a driver with a name matching the value written
43 to driver_override will have an opportunity to bind to the
44 device. The override is specified by writing a string to the
45 driver_override file (echo vfio-ccw > driver_override) and
46 may be cleared with an empty string (echo > driver_override).
47 This returns the device to standard matching rules binding.
48 Writing to driver_override does not automatically unbind the
49 device from its current driver or make any attempt to
50 automatically load the specified driver. If no driver with a
51 matching name is currently loaded in the kernel, the device
52 will not bind to any driver. This also allows devices to
53 opt-out of driver binding using a driver_override name such as
54 "none". Only a single driver may be specified in the override,
55 there is no support for parsing delimiters.
56 Note that unlike the mechanism of the same name for pci, this
57 file does not allow to override basic matching rules. I.e.,
58 the driver must still match the subchannel type of the device.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 1528239f69b2..923fe2001472 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -538,3 +538,26 @@ Description: Intel Energy and Performance Bias Hint (EPB)
538 538
539 This attribute is present for all online CPUs supporting the 539 This attribute is present for all online CPUs supporting the
540 Intel EPB feature. 540 Intel EPB feature.
541
542What: /sys/devices/system/cpu/umwait_control
543 /sys/devices/system/cpu/umwait_control/enable_c02
544 /sys/devices/system/cpu/umwait_control/max_time
545Date: May 2019
546Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
547Description: Umwait control
548
549 enable_c02: Read/write interface to control umwait C0.2 state
550 Read returns C0.2 state status:
551 0: C0.2 is disabled
552 1: C0.2 is enabled
553
554 Write 'y' or '1' or 'on' to enable C0.2 state.
555 Write 'n' or '0' or 'off' to disable C0.2 state.
556
557 The interface is case insensitive.
558
559 max_time: Read/write interface to control umwait maximum time
560 in TSC-quanta that the CPU can reside in either C0.1
561 or C0.2 state. The time is an unsigned 32-bit number.
562 Note that a value of zero means there is no limit.
563 Low order two bits must be zero.
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt
index 613033ff2b9b..5e6429d66c24 100644
--- a/Documentation/RCU/rcuref.txt
+++ b/Documentation/RCU/rcuref.txt
@@ -12,6 +12,7 @@ please read on.
12Reference counting on elements of lists which are protected by traditional 12Reference counting on elements of lists which are protected by traditional
13reader/writer spinlocks or semaphores are straightforward: 13reader/writer spinlocks or semaphores are straightforward:
14 14
15CODE LISTING A:
151. 2. 161. 2.
16add() search_and_reference() 17add() search_and_reference()
17{ { 18{ {
@@ -28,7 +29,8 @@ add() search_and_reference()
28release_referenced() delete() 29release_referenced() delete()
29{ { 30{ {
30 ... write_lock(&list_lock); 31 ... write_lock(&list_lock);
31 atomic_dec(&el->rc, relfunc) ... 32 if(atomic_dec_and_test(&el->rc)) ...
33 kfree(el);
32 ... remove_element 34 ... remove_element
33} write_unlock(&list_lock); 35} write_unlock(&list_lock);
34 ... 36 ...
@@ -44,6 +46,7 @@ search_and_reference() could potentially hold reference to an element which
44has already been deleted from the list/array. Use atomic_inc_not_zero() 46has already been deleted from the list/array. Use atomic_inc_not_zero()
45in this scenario as follows: 47in this scenario as follows:
46 48
49CODE LISTING B:
471. 2. 501. 2.
48add() search_and_reference() 51add() search_and_reference()
49{ { 52{ {
@@ -79,6 +82,7 @@ search_and_reference() code path. In such cases, the
79atomic_dec_and_test() may be moved from delete() to el_free() 82atomic_dec_and_test() may be moved from delete() to el_free()
80as follows: 83as follows:
81 84
85CODE LISTING C:
821. 2. 861. 2.
83add() search_and_reference() 87add() search_and_reference()
84{ { 88{ {
@@ -114,6 +118,17 @@ element can therefore safely be freed. This in turn guarantees that if
114any reader finds the element, that reader may safely acquire a reference 118any reader finds the element, that reader may safely acquire a reference
115without checking the value of the reference counter. 119without checking the value of the reference counter.
116 120
121A clear advantage of the RCU-based pattern in listing C over the one
122in listing B is that any call to search_and_reference() that locates
123a given object will succeed in obtaining a reference to that object,
124even given a concurrent invocation of delete() for that same object.
125Similarly, a clear advantage of both listings B and C over listing A is
126that a call to delete() is not delayed even if there are an arbitrarily
127large number of calls to search_and_reference() searching for the same
128object that delete() was invoked on. Instead, all that is delayed is
129the eventual invocation of kfree(), which is usually not a problem on
130modern computer systems, even the small ones.
131
117In cases where delete() can sleep, synchronize_rcu() can be called from 132In cases where delete() can sleep, synchronize_rcu() can be called from
118delete(), so that el_free() can be subsumed into delete as follows: 133delete(), so that el_free() can be subsumed into delete as follows:
119 134
@@ -130,3 +145,7 @@ delete()
130 kfree(el); 145 kfree(el);
131 ... 146 ...
132} 147}
148
149As additional examples in the kernel, the pattern in listing C is used by
150reference counting of struct pid, while the pattern in listing B is used by
151struct posix_acl.
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 1ab70c37921f..13e88fc00f01 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -153,7 +153,7 @@ rcupdate.rcu_task_stall_timeout
153 This boot/sysfs parameter controls the RCU-tasks stall warning 153 This boot/sysfs parameter controls the RCU-tasks stall warning
154 interval. A value of zero or less suppresses RCU-tasks stall 154 interval. A value of zero or less suppresses RCU-tasks stall
155 warnings. A positive value sets the stall-warning interval 155 warnings. A positive value sets the stall-warning interval
156 in jiffies. An RCU-tasks stall warning starts with the line: 156 in seconds. An RCU-tasks stall warning starts with the line:
157 157
158 INFO: rcu_tasks detected stalls on tasks: 158 INFO: rcu_tasks detected stalls on tasks:
159 159
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 981651a8b65d..7e1a8721637a 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -212,7 +212,7 @@ synchronize_rcu()
212 212
213rcu_assign_pointer() 213rcu_assign_pointer()
214 214
215 typeof(p) rcu_assign_pointer(p, typeof(p) v); 215 void rcu_assign_pointer(p, typeof(p) v);
216 216
217 Yes, rcu_assign_pointer() -is- implemented as a macro, though it 217 Yes, rcu_assign_pointer() -is- implemented as a macro, though it
218 would be cool to be able to declare a function in this manner. 218 would be cool to be able to declare a function in this manner.
@@ -220,9 +220,9 @@ rcu_assign_pointer()
220 220
221 The updater uses this function to assign a new value to an 221 The updater uses this function to assign a new value to an
222 RCU-protected pointer, in order to safely communicate the change 222 RCU-protected pointer, in order to safely communicate the change
223 in value from the updater to the reader. This function returns 223 in value from the updater to the reader. This macro does not
224 the new value, and also executes any memory-barrier instructions 224 evaluate to an rvalue, but it does execute any memory-barrier
225 required for a given CPU architecture. 225 instructions required for a given CPU architecture.
226 226
227 Perhaps just as important, it serves to document (1) which 227 Perhaps just as important, it serves to document (1) which
228 pointers are protected by RCU and (2) the point at which a 228 pointers are protected by RCU and (2) the point at which a
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..e6e806285703 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -478,7 +478,7 @@
478 others). 478 others).
479 479
480 ccw_timeout_log [S390] 480 ccw_timeout_log [S390]
481 See Documentation/s390/CommonIO for details. 481 See Documentation/s390/common_io.rst for details.
482 482
483 cgroup_disable= [KNL] Disable a particular controller 483 cgroup_disable= [KNL] Disable a particular controller
484 Format: {name of the controller(s) to disable} 484 Format: {name of the controller(s) to disable}
@@ -516,7 +516,7 @@
516 /selinux/checkreqprot. 516 /selinux/checkreqprot.
517 517
518 cio_ignore= [S390] 518 cio_ignore= [S390]
519 See Documentation/s390/CommonIO for details. 519 See Documentation/s390/common_io.rst for details.
520 clk_ignore_unused 520 clk_ignore_unused
521 [CLK] 521 [CLK]
522 Prevents the clock framework from automatically gating 522 Prevents the clock framework from automatically gating
@@ -3752,6 +3752,12 @@
3752 the propagation of recent CPU-hotplug changes up 3752 the propagation of recent CPU-hotplug changes up
3753 the rcu_node combining tree. 3753 the rcu_node combining tree.
3754 3754
3755 rcutree.use_softirq= [KNL]
3756 If set to zero, move all RCU_SOFTIRQ processing to
3757 per-CPU rcuc kthreads. Defaults to a non-zero
3758 value, meaning that RCU_SOFTIRQ is used by default.
3759 Specify rcutree.use_softirq=0 to use rcuc kthreads.
3760
3755 rcutree.rcu_fanout_exact= [KNL] 3761 rcutree.rcu_fanout_exact= [KNL]
3756 Disable autobalancing of the rcu_node combining 3762 Disable autobalancing of the rcu_node combining
3757 tree. This is used by rcutorture, and might 3763 tree. This is used by rcutorture, and might
@@ -5100,13 +5106,12 @@
5100 targets for exploits that can control RIP. 5106 targets for exploits that can control RIP.
5101 5107
5102 emulate [default] Vsyscalls turn into traps and are 5108 emulate [default] Vsyscalls turn into traps and are
5103 emulated reasonably safely. 5109 emulated reasonably safely. The vsyscall
5110 page is readable.
5104 5111
5105 native Vsyscalls are native syscall instructions. 5112 xonly Vsyscalls turn into traps and are
5106 This is a little bit faster than trapping 5113 emulated reasonably safely. The vsyscall
5107 and makes a few dynamic recompilers work 5114 page is not readable.
5108 better than they would in emulation mode.
5109 It also makes exploits much easier to write.
5110 5115
5111 none Vsyscalls don't work at all. This makes 5116 none Vsyscalls don't work at all. This makes
5112 them quite hard to use for exploits but 5117 them quite hard to use for exploits but
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index b73a2519ecf2..5ae2ef2c12f3 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -207,6 +207,10 @@ HWCAP_FLAGM
207 207
208 Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. 208 Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
209 209
210HWCAP2_FLAGM2
211
212 Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
213
210HWCAP_SSBS 214HWCAP_SSBS
211 215
212 Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. 216 Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
@@ -223,6 +227,10 @@ HWCAP_PACG
223 ID_AA64ISAR1_EL1.GPI == 0b0001, as described by 227 ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
224 Documentation/arm64/pointer-authentication.txt. 228 Documentation/arm64/pointer-authentication.txt.
225 229
230HWCAP2_FRINT
231
232 Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
233
226 234
2274. Unused AT_HWCAP bits 2354. Unused AT_HWCAP bits
228----------------------- 236-----------------------
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index dca3fb0554db..0ab747e0d5ac 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -81,9 +81,11 @@ Non-RMW ops:
81 81
82The non-RMW ops are (typically) regular LOADs and STOREs and are canonically 82The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
83implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and 83implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
84smp_store_release() respectively. 84smp_store_release() respectively. Therefore, if you find yourself only using
85the Non-RMW operations of atomic_t, you do not in fact need atomic_t at all
86and are doing it wrong.
85 87
86The one detail to this is that atomic_set{}() should be observable to the RMW 88A subtle detail of atomic_set{}() is that it should be observable to the RMW
87ops. That is: 89ops. That is:
88 90
89 C atomic-set 91 C atomic-set
@@ -187,13 +189,22 @@ The barriers:
187 189
188 smp_mb__{before,after}_atomic() 190 smp_mb__{before,after}_atomic()
189 191
190only apply to the RMW ops and can be used to augment/upgrade the ordering 192only apply to the RMW atomic ops and can be used to augment/upgrade the
191inherent to the used atomic op. These barriers provide a full smp_mb(). 193ordering inherent to the op. These barriers act almost like a full smp_mb():
194smp_mb__before_atomic() orders all earlier accesses against the RMW op
195itself and all accesses following it, and smp_mb__after_atomic() orders all
196later accesses against the RMW op and all accesses preceding it. However,
197accesses between the smp_mb__{before,after}_atomic() and the RMW op are not
198ordered, so it is advisable to place the barrier right next to the RMW atomic
199op whenever possible.
192 200
193These helper barriers exist because architectures have varying implicit 201These helper barriers exist because architectures have varying implicit
194ordering on their SMP atomic primitives. For example our TSO architectures 202ordering on their SMP atomic primitives. For example our TSO architectures
195provide full ordered atomics and these barriers are no-ops. 203provide full ordered atomics and these barriers are no-ops.
196 204
205NOTE: when the atomic RmW ops are fully ordered, they should also imply a
206compiler barrier.
207
197Thus: 208Thus:
198 209
199 atomic_fetch_add(); 210 atomic_fetch_add();
@@ -212,7 +223,9 @@ Further, while something like:
212 atomic_dec(&X); 223 atomic_dec(&X);
213 224
214is a 'typical' RELEASE pattern, the barrier is strictly stronger than 225is a 'typical' RELEASE pattern, the barrier is strictly stronger than
215a RELEASE. Similarly for something like: 226a RELEASE because it orders preceding instructions against both the read
227and write parts of the atomic_dec(), and against all following instructions
228as well. Similarly, something like:
216 229
217 atomic_inc(&X); 230 atomic_inc(&X);
218 smp_mb__after_atomic(); 231 smp_mb__after_atomic();
@@ -244,7 +257,8 @@ strictly stronger than ACQUIRE. As illustrated:
244 257
245This should not happen; but a hypothetical atomic_inc_acquire() -- 258This should not happen; but a hypothetical atomic_inc_acquire() --
246(void)atomic_fetch_inc_acquire() for instance -- would allow the outcome, 259(void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
247since then: 260because it would not order the W part of the RMW against the following
261WRITE_ONCE. Thus:
248 262
249 P1 P2 263 P1 P2
250 264
diff --git a/Documentation/core-api/circular-buffers.rst b/Documentation/core-api/circular-buffers.rst
index 53e51caa3347..50966f66e398 100644
--- a/Documentation/core-api/circular-buffers.rst
+++ b/Documentation/core-api/circular-buffers.rst
@@ -3,7 +3,7 @@ Circular Buffers
3================ 3================
4 4
5:Author: David Howells <dhowells@redhat.com> 5:Author: David Howells <dhowells@redhat.com>
6:Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 6:Author: Paul E. McKenney <paulmck@linux.ibm.com>
7 7
8 8
9Linux provides a number of features that can be used to implement circular 9Linux provides a number of features that can be used to implement circular
diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
index 93cbeb9daec0..20ee447a50f3 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -65,7 +65,7 @@ different format depending on what is required by the user:
65.. c:function:: u64 ktime_get_ns( void ) 65.. c:function:: u64 ktime_get_ns( void )
66 u64 ktime_get_boottime_ns( void ) 66 u64 ktime_get_boottime_ns( void )
67 u64 ktime_get_real_ns( void ) 67 u64 ktime_get_real_ns( void )
68 u64 ktime_get_tai_ns( void ) 68 u64 ktime_get_clocktai_ns( void )
69 u64 ktime_get_raw_ns( void ) 69 u64 ktime_get_raw_ns( void )
70 70
71 Same as the plain ktime_get functions, but returning a u64 number 71 Same as the plain ktime_get functions, but returning a u64 number
@@ -99,16 +99,20 @@ Coarse and fast_ns access
99 99
100Some additional variants exist for more specialized cases: 100Some additional variants exist for more specialized cases:
101 101
102.. c:function:: ktime_t ktime_get_coarse_boottime( void ) 102.. c:function:: ktime_t ktime_get_coarse( void )
103 ktime_t ktime_get_coarse_boottime( void )
103 ktime_t ktime_get_coarse_real( void ) 104 ktime_t ktime_get_coarse_real( void )
104 ktime_t ktime_get_coarse_clocktai( void ) 105 ktime_t ktime_get_coarse_clocktai( void )
105 ktime_t ktime_get_coarse_raw( void ) 106
107.. c:function:: u64 ktime_get_coarse_ns( void )
108 u64 ktime_get_coarse_boottime_ns( void )
109 u64 ktime_get_coarse_real_ns( void )
110 u64 ktime_get_coarse_clocktai_ns( void )
106 111
107.. c:function:: void ktime_get_coarse_ts64( struct timespec64 * ) 112.. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
108 void ktime_get_coarse_boottime_ts64( struct timespec64 * ) 113 void ktime_get_coarse_boottime_ts64( struct timespec64 * )
109 void ktime_get_coarse_real_ts64( struct timespec64 * ) 114 void ktime_get_coarse_real_ts64( struct timespec64 * )
110 void ktime_get_coarse_clocktai_ts64( struct timespec64 * ) 115 void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
111 void ktime_get_coarse_raw_ts64( struct timespec64 * )
112 116
113 These are quicker than the non-coarse versions, but less accurate, 117 These are quicker than the non-coarse versions, but less accurate,
114 corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE 118 corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt b/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
new file mode 100644
index 000000000000..4e82fd575cec
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
@@ -0,0 +1,29 @@
1Amazon's Annapurna Labs Fabric Interrupt Controller
2
3Required properties:
4
5- compatible: should be "amazon,al-fic"
6- reg: physical base address and size of the registers
7- interrupt-controller: identifies the node as an interrupt controller
8- #interrupt-cells: must be 2.
9 First cell defines the index of the interrupt within the controller.
10 Second cell is used to specify the trigger type and must be one of the
11 following:
12 - bits[3:0] trigger type and level flags
13 1 = low-to-high edge triggered
14 4 = active high level-sensitive
15- interrupt-parent: specifies the parent interrupt controller.
16- interrupts: describes which input line in the interrupt parent, this
17 fic's output is connected to. This field property depends on the parent's
18 binding
19
20Example:
21
22amazon_fic: interrupt-controller@0xfd8a8500 {
23 compatible = "amazon,al-fic";
24 interrupt-controller;
25 #interrupt-cells = <2>;
26 reg = <0x0 0xfd8a8500 0x0 0x1000>;
27 interrupt-parent = <&gic>;
28 interrupts = <GIC_SPI 0x0 IRQ_TYPE_LEVEL_HIGH>;
29};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index 1502a51548bb..7d531d5fff29 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -15,6 +15,7 @@ Required properties:
15 "amlogic,meson-gxbb-gpio-intc" for GXBB SoCs (S905) or 15 "amlogic,meson-gxbb-gpio-intc" for GXBB SoCs (S905) or
16 "amlogic,meson-gxl-gpio-intc" for GXL SoCs (S905X, S912) 16 "amlogic,meson-gxl-gpio-intc" for GXL SoCs (S905X, S912)
17 "amlogic,meson-axg-gpio-intc" for AXG SoCs (A113D, A113X) 17 "amlogic,meson-axg-gpio-intc" for AXG SoCs (A113D, A113X)
18 "amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2)
18- reg : Specifies base physical address and size of the registers. 19- reg : Specifies base physical address and size of the registers.
19- interrupt-controller : Identifies the node as an interrupt controller. 20- interrupt-controller : Identifies the node as an interrupt controller.
20- #interrupt-cells : Specifies the number of cells needed to encode an 21- #interrupt-cells : Specifies the number of cells needed to encode an
diff --git a/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt b/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
index ab921f1698fb..e13405355166 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
@@ -6,11 +6,16 @@ C-SKY Multi-processors Interrupt Controller is designed for ck807/ck810/ck860
6SMP soc, and it also could be used in non-SMP system. 6SMP soc, and it also could be used in non-SMP system.
7 7
8Interrupt number definition: 8Interrupt number definition:
9
10 0-15 : software irq, and we use 15 as our IPI_IRQ. 9 0-15 : software irq, and we use 15 as our IPI_IRQ.
11 16-31 : private irq, and we use 16 as the co-processor timer. 10 16-31 : private irq, and we use 16 as the co-processor timer.
12 31-1024: common irq for soc ip. 11 31-1024: common irq for soc ip.
13 12
13Interrupt triger mode: (Defined in dt-bindings/interrupt-controller/irq.h)
14 IRQ_TYPE_LEVEL_HIGH (default)
15 IRQ_TYPE_LEVEL_LOW
16 IRQ_TYPE_EDGE_RISING
17 IRQ_TYPE_EDGE_FALLING
18
14============================= 19=============================
15intc node bindings definition 20intc node bindings definition
16============================= 21=============================
@@ -26,15 +31,22 @@ intc node bindings definition
26 - #interrupt-cells 31 - #interrupt-cells
27 Usage: required 32 Usage: required
28 Value type: <u32> 33 Value type: <u32>
29 Definition: must be <1> 34 Definition: <2>
30 - interrupt-controller: 35 - interrupt-controller:
31 Usage: required 36 Usage: required
32 37
33Examples: 38Examples: ("interrupts = <irq_num IRQ_TYPE_XXX>")
34--------- 39---------
40#include <dt-bindings/interrupt-controller/irq.h>
35 41
36 intc: interrupt-controller { 42 intc: interrupt-controller {
37 compatible = "csky,mpintc"; 43 compatible = "csky,mpintc";
38 #interrupt-cells = <1>; 44 #interrupt-cells = <2>;
39 interrupt-controller; 45 interrupt-controller;
40 }; 46 };
47
48 device: device-example {
49 ...
50 interrupts = <34 IRQ_TYPE_EDGE_RISING>;
51 interrupt-parent = <&intc>;
52 };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt
new file mode 100644
index 000000000000..727b7e4cd6e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt
@@ -0,0 +1,43 @@
1DT bindings for the Renesas RZ/A1 Interrupt Controller
2
3The RZ/A1 Interrupt Controller is a front-end for the GIC found on Renesas
4RZ/A1 and RZ/A2 SoCs:
5 - IRQ sense select for 8 external interrupts, 1:1-mapped to 8 GIC SPI
6 interrupts,
7 - NMI edge select.
8
9Required properties:
10 - compatible: Must be "renesas,<soctype>-irqc", and "renesas,rza1-irqc" as
11 fallback.
12 Examples with soctypes are:
13 - "renesas,r7s72100-irqc" (RZ/A1H)
14 - "renesas,r7s9210-irqc" (RZ/A2M)
15 - #interrupt-cells: Must be 2 (an interrupt index and flags, as defined
16 in interrupts.txt in this directory)
17 - #address-cells: Must be zero
18 - interrupt-controller: Marks the device as an interrupt controller
19 - reg: Base address and length of the memory resource used by the interrupt
20 controller
21 - interrupt-map: Specifies the mapping from external interrupts to GIC
22 interrupts
23 - interrupt-map-mask: Must be <7 0>
24
25Example:
26
27 irqc: interrupt-controller@fcfef800 {
28 compatible = "renesas,r7s72100-irqc", "renesas,rza1-irqc";
29 #interrupt-cells = <2>;
30 #address-cells = <0>;
31 interrupt-controller;
32 reg = <0xfcfef800 0x6>;
33 interrupt-map =
34 <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
35 <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
36 <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
37 <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
38 <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
39 <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
40 <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
41 <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
42 interrupt-map-mask = <7 0>;
43 };
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
new file mode 100644
index 000000000000..d77e3f26f9e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
@@ -0,0 +1,21 @@
1* Freescale(NXP) IMX8 DDR performance monitor
2
3Required properties:
4
5- compatible: should be one of:
6 "fsl,imx8-ddr-pmu"
7 "fsl,imx8m-ddr-pmu"
8
9- reg: physical address and size
10
11- interrupts: single interrupt
12 generated by the control block
13
14Example:
15
16 ddr-pmu@5c020000 {
17 compatible = "fsl,imx8-ddr-pmu";
18 reg = <0x5c020000 0x10000>;
19 interrupt-parent = <&gic>;
20 interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
21 };
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index 27f02ec4bb45..f97a4ecd7b91 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -152,17 +152,19 @@ examples:
152 - | 152 - |
153 // Example 2: Spike ISA Simulator with 1 Hart 153 // Example 2: Spike ISA Simulator with 1 Hart
154 cpus { 154 cpus {
155 cpu@0 { 155 #address-cells = <1>;
156 device_type = "cpu"; 156 #size-cells = <0>;
157 reg = <0>; 157 cpu@0 {
158 compatible = "riscv"; 158 device_type = "cpu";
159 riscv,isa = "rv64imafdc"; 159 reg = <0>;
160 mmu-type = "riscv,sv48"; 160 compatible = "riscv";
161 interrupt-controller { 161 riscv,isa = "rv64imafdc";
162 #interrupt-cells = <1>; 162 mmu-type = "riscv,sv48";
163 interrupt-controller; 163 interrupt-controller {
164 compatible = "riscv,cpu-intc"; 164 #interrupt-cells = <1>;
165 }; 165 interrupt-controller;
166 }; 166 compatible = "riscv,cpu-intc";
167 };
168 };
167 }; 169 };
168... 170...
diff --git a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt
new file mode 100644
index 000000000000..d57659996d62
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt
@@ -0,0 +1,25 @@
1NXP System Counter Module(sys_ctr)
2
3The system counter(sys_ctr) is a programmable system counter which provides
4a shared time base to Cortex A15, A7, A53, A73, etc. it is intended for use in
5applications where the counter is always powered and support multiple,
6unrelated clocks. The compare frame inside can be used for timer purpose.
7
8Required properties:
9
10- compatible : should be "nxp,sysctr-timer"
11- reg : Specifies the base physical address and size of the comapre
12 frame and the counter control, read & compare.
13- interrupts : should be the first compare frames' interrupt
14- clocks : Specifies the counter clock.
15- clock-names: Specifies the clock's name of this module
16
17Example:
18
19 system_counter: timer@306a0000 {
20 compatible = "nxp,sysctr-timer";
21 reg = <0x306a0000 0x20000>;/* system-counter-rd & compare */
22 clocks = <&clk_8m>;
23 clock-names = "per";
24 interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>;
25 };
diff --git a/Documentation/driver-api/s390-drivers.rst b/Documentation/driver-api/s390-drivers.rst
index 30e6aa7e160b..5158577bc29b 100644
--- a/Documentation/driver-api/s390-drivers.rst
+++ b/Documentation/driver-api/s390-drivers.rst
@@ -27,7 +27,7 @@ not strictly considered I/O devices. They are considered here as well,
27although they are not the focus of this document. 27although they are not the focus of this document.
28 28
29Some additional information can also be found in the kernel source under 29Some additional information can also be found in the kernel source under
30Documentation/s390/driver-model.txt. 30Documentation/s390/driver-model.rst.
31 31
32The css bus 32The css bus
33=========== 33===========
@@ -38,7 +38,7 @@ into several categories:
38* Standard I/O subchannels, for use by the system. They have a child 38* Standard I/O subchannels, for use by the system. They have a child
39 device on the ccw bus and are described below. 39 device on the ccw bus and are described below.
40* I/O subchannels bound to the vfio-ccw driver. See 40* I/O subchannels bound to the vfio-ccw driver. See
41 Documentation/s390/vfio-ccw.txt. 41 Documentation/s390/vfio-ccw.rst.
42* Message subchannels. No Linux driver currently exists. 42* Message subchannels. No Linux driver currently exists.
43* CHSC subchannels (at most one). The chsc subchannel driver can be used 43* CHSC subchannels (at most one). The chsc subchannel driver can be used
44 to send asynchronous chsc commands. 44 to send asynchronous chsc commands.
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
index 39fae143c9cb..f189d130e543 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -15,34 +15,48 @@ tens of thousands of) instantiations. For example a lock in the inode
15struct is one class, while each inode has its own instantiation of that 15struct is one class, while each inode has its own instantiation of that
16lock class. 16lock class.
17 17
18The validator tracks the 'state' of lock-classes, and it tracks 18The validator tracks the 'usage state' of lock-classes, and it tracks
19dependencies between different lock-classes. The validator maintains a 19the dependencies between different lock-classes. Lock usage indicates
20rolling proof that the state and the dependencies are correct. 20how a lock is used with regard to its IRQ contexts, while lock
21 21dependency can be understood as lock order, where L1 -> L2 suggests that
22Unlike an lock instantiation, the lock-class itself never goes away: when 22a task is attempting to acquire L2 while holding L1. From lockdep's
23a lock-class is used for the first time after bootup it gets registered, 23perspective, the two locks (L1 and L2) are not necessarily related; that
24and all subsequent uses of that lock-class will be attached to this 24dependency just means the order ever happened. The validator maintains a
25lock-class. 25continuing effort to prove lock usages and dependencies are correct or
26the validator will shoot a splat if incorrect.
27
28A lock-class's behavior is constructed by its instances collectively:
29when the first instance of a lock-class is used after bootup the class
30gets registered, then all (subsequent) instances will be mapped to the
31class and hence their usages and dependecies will contribute to those of
32the class. A lock-class does not go away when a lock instance does, but
33it can be removed if the memory space of the lock class (static or
34dynamic) is reclaimed, this happens for example when a module is
35unloaded or a workqueue is destroyed.
26 36
27State 37State
28----- 38-----
29 39
30The validator tracks lock-class usage history into 4 * nSTATEs + 1 separate 40The validator tracks lock-class usage history and divides the usage into
31state bits: 41(4 usages * n STATEs + 1) categories:
32 42
43where the 4 usages can be:
33- 'ever held in STATE context' 44- 'ever held in STATE context'
34- 'ever held as readlock in STATE context' 45- 'ever held as readlock in STATE context'
35- 'ever held with STATE enabled' 46- 'ever held with STATE enabled'
36- 'ever held as readlock with STATE enabled' 47- 'ever held as readlock with STATE enabled'
37 48
38Where STATE can be either one of (kernel/locking/lockdep_states.h) 49where the n STATEs are coded in kernel/locking/lockdep_states.h and as of
39 - hardirq 50now they include:
40 - softirq 51- hardirq
52- softirq
41 53
54where the last 1 category is:
42- 'ever used' [ == !unused ] 55- 'ever used' [ == !unused ]
43 56
44When locking rules are violated, these state bits are presented in the 57When locking rules are violated, these usage bits are presented in the
45locking error messages, inside curlies. A contrived example: 58locking error messages, inside curlies, with a total of 2 * n STATEs bits.
59A contrived example:
46 60
47 modprobe/2287 is trying to acquire lock: 61 modprobe/2287 is trying to acquire lock:
48 (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24 62 (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
@@ -51,28 +65,67 @@ locking error messages, inside curlies. A contrived example:
51 (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24 65 (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
52 66
53 67
54The bit position indicates STATE, STATE-read, for each of the states listed 68For a given lock, the bit positions from left to right indicate the usage
55above, and the character displayed in each indicates: 69of the lock and readlock (if exists), for each of the n STATEs listed
70above respectively, and the character displayed at each bit position
71indicates:
56 72
57 '.' acquired while irqs disabled and not in irq context 73 '.' acquired while irqs disabled and not in irq context
58 '-' acquired in irq context 74 '-' acquired in irq context
59 '+' acquired with irqs enabled 75 '+' acquired with irqs enabled
60 '?' acquired in irq context with irqs enabled. 76 '?' acquired in irq context with irqs enabled.
61 77
62Unused mutexes cannot be part of the cause of an error. 78The bits are illustrated with an example:
79
80 (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
81 ||||
82 ||| \-> softirq disabled and not in softirq context
83 || \--> acquired in softirq context
84 | \---> hardirq disabled and not in hardirq context
85 \----> acquired in hardirq context
86
87
88For a given STATE, whether the lock is ever acquired in that STATE
89context and whether that STATE is enabled yields four possible cases as
90shown in the table below. The bit character is able to indicate which
91exact case is for the lock as of the reporting time.
92
93 -------------------------------------------
94 | | irq enabled | irq disabled |
95 |-------------------------------------------|
96 | ever in irq | ? | - |
97 |-------------------------------------------|
98 | never in irq | + | . |
99 -------------------------------------------
100
101The character '-' suggests irq is disabled because if otherwise the
102charactor '?' would have been shown instead. Similar deduction can be
103applied for '+' too.
104
105Unused locks (e.g., mutexes) cannot be part of the cause of an error.
63 106
64 107
65Single-lock state rules: 108Single-lock state rules:
66------------------------ 109------------------------
67 110
111A lock is irq-safe means it was ever used in an irq context, while a lock
112is irq-unsafe means it was ever acquired with irq enabled.
113
68A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The 114A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
69following states are exclusive, and only one of them is allowed to be 115following states must be exclusive: only one of them is allowed to be set
70set for any lock-class: 116for any lock-class based on its usage:
117
118 <hardirq-safe> or <hardirq-unsafe>
119 <softirq-safe> or <softirq-unsafe>
71 120
72 <hardirq-safe> and <hardirq-unsafe> 121This is because if a lock can be used in irq context (irq-safe) then it
73 <softirq-safe> and <softirq-unsafe> 122cannot be ever acquired with irq enabled (irq-unsafe). Otherwise, a
123deadlock may happen. For example, in the scenario that after this lock
124was acquired but before released, if the context is interrupted this
125lock will be attempted to acquire twice, which creates a deadlock,
126referred to as lock recursion deadlock.
74 127
75The validator detects and reports lock usage that violate these 128The validator detects and reports lock usage that violates these
76single-lock state rules. 129single-lock state rules.
77 130
78Multi-lock dependency rules: 131Multi-lock dependency rules:
@@ -81,15 +134,18 @@ Multi-lock dependency rules:
81The same lock-class must not be acquired twice, because this could lead 134The same lock-class must not be acquired twice, because this could lead
82to lock recursion deadlocks. 135to lock recursion deadlocks.
83 136
84Furthermore, two locks may not be taken in different order: 137Furthermore, two locks can not be taken in inverse order:
85 138
86 <L1> -> <L2> 139 <L1> -> <L2>
87 <L2> -> <L1> 140 <L2> -> <L1>
88 141
89because this could lead to lock inversion deadlocks. (The validator 142because this could lead to a deadlock - referred to as lock inversion
90finds such dependencies in arbitrary complexity, i.e. there can be any 143deadlock - as attempts to acquire the two locks form a circle which
91other locking sequence between the acquire-lock operations, the 144could lead to the two contexts waiting for each other permanently. The
92validator will still track all dependencies between locks.) 145validator will find such dependency circle in arbitrary complexity,
146i.e., there can be any other locking sequence between the acquire-lock
147operations; the validator will still find whether these locks can be
148acquired in a circular fashion.
93 149
94Furthermore, the following usage based lock dependencies are not allowed 150Furthermore, the following usage based lock dependencies are not allowed
95between any two lock-classes: 151between any two lock-classes:
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f70ebcdfe592..e4e07c8ab89e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -3,7 +3,7 @@
3 ============================ 3 ============================
4 4
5By: David Howells <dhowells@redhat.com> 5By: David Howells <dhowells@redhat.com>
6 Paul E. McKenney <paulmck@linux.vnet.ibm.com> 6 Paul E. McKenney <paulmck@linux.ibm.com>
7 Will Deacon <will.deacon@arm.com> 7 Will Deacon <will.deacon@arm.com>
8 Peter Zijlstra <peterz@infradead.org> 8 Peter Zijlstra <peterz@infradead.org>
9 9
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 18735dc460a0..0a18075c485e 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with isdn4k-utils.
31====================== =============== ======================================== 31====================== =============== ========================================
32GNU C 4.6 gcc --version 32GNU C 4.6 gcc --version
33GNU make 3.81 make --version 33GNU make 3.81 make --version
34binutils 2.20 ld -v 34binutils 2.21 ld -v
35flex 2.5.35 flex --version 35flex 2.5.35 flex --version
36bison 2.0 bison --version 36bison 2.0 bison --version
37util-linux 2.10o fdformat --version 37util-linux 2.10o fdformat --version
@@ -77,9 +77,7 @@ You will need GNU make 3.81 or later to build the kernel.
77Binutils 77Binutils
78-------- 78--------
79 79
80The build system has, as of 4.13, switched to using thin archives (`ar T`) 80Binutils 2.21 or newer is needed to build the kernel.
81rather than incremental linking (`ld -r`) for built-in.a intermediate steps.
82This requires binutils 2.20 or newer.
83 81
84pkg-config 82pkg-config
85---------- 83----------
diff --git a/Documentation/s390/3270.txt b/Documentation/s390/3270.rst
index 7c715de99774..e09e77954238 100644
--- a/Documentation/s390/3270.txt
+++ b/Documentation/s390/3270.rst
@@ -1,13 +1,17 @@
1===============================
1IBM 3270 Display System support 2IBM 3270 Display System support
3===============================
2 4
3This file describes the driver that supports local channel attachment 5This file describes the driver that supports local channel attachment
4of IBM 3270 devices. It consists of three sections: 6of IBM 3270 devices. It consists of three sections:
7
5 * Introduction 8 * Introduction
6 * Installation 9 * Installation
7 * Operation 10 * Operation
8 11
9 12
10INTRODUCTION. 13Introduction
14============
11 15
12This paper describes installing and operating 3270 devices under 16This paper describes installing and operating 3270 devices under
13Linux/390. A 3270 device is a block-mode rows-and-columns terminal of 17Linux/390. A 3270 device is a block-mode rows-and-columns terminal of
@@ -17,12 +21,12 @@ twenty and thirty years ago.
17You may have 3270s in-house and not know it. If you're using the 21You may have 3270s in-house and not know it. If you're using the
18VM-ESA operating system, define a 3270 to your virtual machine by using 22VM-ESA operating system, define a 3270 to your virtual machine by using
19the command "DEF GRAF <hex-address>" This paper presumes you will be 23the command "DEF GRAF <hex-address>" This paper presumes you will be
20defining four 3270s with the CP/CMS commands 24defining four 3270s with the CP/CMS commands:
21 25
22 DEF GRAF 620 26 - DEF GRAF 620
23 DEF GRAF 621 27 - DEF GRAF 621
24 DEF GRAF 622 28 - DEF GRAF 622
25 DEF GRAF 623 29 - DEF GRAF 623
26 30
27Your network connection from VM-ESA allows you to use x3270, tn3270, or 31Your network connection from VM-ESA allows you to use x3270, tn3270, or
28another 3270 emulator, started from an xterm window on your PC or 32another 3270 emulator, started from an xterm window on your PC or
@@ -34,7 +38,8 @@ This paper covers installation of the driver and operation of a
34dialed-in x3270. 38dialed-in x3270.
35 39
36 40
37INSTALLATION. 41Installation
42============
38 43
39You install the driver by installing a patch, doing a kernel build, and 44You install the driver by installing a patch, doing a kernel build, and
40running the configuration script (config3270.sh, in this directory). 45running the configuration script (config3270.sh, in this directory).
@@ -59,13 +64,15 @@ Use #CP TERM CONMODE 3270 to change it to 3270. If you generate only
59at boot time to a 3270 if it is a 3215. 64at boot time to a 3270 if it is a 3215.
60 65
61In brief, these are the steps: 66In brief, these are the steps:
67
62 1. Install the tub3270 patch 68 1. Install the tub3270 patch
63 2. (If a module) add a line to a file in /etc/modprobe.d/*.conf 69 2. (If a module) add a line to a file in `/etc/modprobe.d/*.conf`
64 3. (If VM) define devices with DEF GRAF 70 3. (If VM) define devices with DEF GRAF
65 4. Reboot 71 4. Reboot
66 5. Configure 72 5. Configure
67 73
68To test that everything works, assuming VM and x3270, 74To test that everything works, assuming VM and x3270,
75
69 1. Bring up an x3270 window. 76 1. Bring up an x3270 window.
70 2. Use the DIAL command in that window. 77 2. Use the DIAL command in that window.
71 3. You should immediately see a Linux login screen. 78 3. You should immediately see a Linux login screen.
@@ -74,7 +81,8 @@ Here are the installation steps in detail:
74 81
75 1. The 3270 driver is a part of the official Linux kernel 82 1. The 3270 driver is a part of the official Linux kernel
76 source. Build a tree with the kernel source and any necessary 83 source. Build a tree with the kernel source and any necessary
77 patches. Then do 84 patches. Then do::
85
78 make oldconfig 86 make oldconfig
79 (If you wish to disable 3215 console support, edit 87 (If you wish to disable 3215 console support, edit
80 .config; change CONFIG_TN3215's value to "n"; 88 .config; change CONFIG_TN3215's value to "n";
@@ -84,20 +92,22 @@ Here are the installation steps in detail:
84 make modules_install 92 make modules_install
85 93
86 2. (Perform this step only if you have configured tub3270 as a 94 2. (Perform this step only if you have configured tub3270 as a
87 module.) Add a line to a file /etc/modprobe.d/*.conf to automatically 95 module.) Add a line to a file `/etc/modprobe.d/*.conf` to automatically
88 load the driver when it's needed. With this line added, you will see 96 load the driver when it's needed. With this line added, you will see
89 login prompts appear on your 3270s as soon as boot is complete (or 97 login prompts appear on your 3270s as soon as boot is complete (or
90 with emulated 3270s, as soon as you dial into your vm guest using the 98 with emulated 3270s, as soon as you dial into your vm guest using the
91 command "DIAL <vmguestname>"). Since the line-mode major number is 99 command "DIAL <vmguestname>"). Since the line-mode major number is
92 227, the line to add should be: 100 227, the line to add should be::
101
93 alias char-major-227 tub3270 102 alias char-major-227 tub3270
94 103
95 3. Define graphic devices to your vm guest machine, if you 104 3. Define graphic devices to your vm guest machine, if you
96 haven't already. Define them before you reboot (reipl): 105 haven't already. Define them before you reboot (reipl):
97 DEFINE GRAF 620 106
98 DEFINE GRAF 621 107 - DEFINE GRAF 620
99 DEFINE GRAF 622 108 - DEFINE GRAF 621
100 DEFINE GRAF 623 109 - DEFINE GRAF 622
110 - DEFINE GRAF 623
101 111
102 4. Reboot. The reboot process scans hardware devices, including 112 4. Reboot. The reboot process scans hardware devices, including
103 3270s, and this enables the tub3270 driver once loaded to respond 113 3270s, and this enables the tub3270 driver once loaded to respond
@@ -107,21 +117,23 @@ Here are the installation steps in detail:
107 117
108 5. Run the 3270 configuration script config3270. It is 118 5. Run the 3270 configuration script config3270. It is
109 distributed in this same directory, Documentation/s390, as 119 distributed in this same directory, Documentation/s390, as
110 config3270.sh. Inspect the output script it produces, 120 config3270.sh. Inspect the output script it produces,
111 /tmp/mkdev3270, and then run that script. This will create the 121 /tmp/mkdev3270, and then run that script. This will create the
112 necessary character special device files and make the necessary 122 necessary character special device files and make the necessary
113 changes to /etc/inittab. 123 changes to /etc/inittab.
114 124
115 Then notify /sbin/init that /etc/inittab has changed, by issuing 125 Then notify /sbin/init that /etc/inittab has changed, by issuing
116 the telinit command with the q operand: 126 the telinit command with the q operand::
127
117 cd Documentation/s390 128 cd Documentation/s390
118 sh config3270.sh 129 sh config3270.sh
119 sh /tmp/mkdev3270 130 sh /tmp/mkdev3270
120 telinit q 131 telinit q
121 132
122 This should be sufficient for your first time. If your 3270 133 This should be sufficient for your first time. If your 3270
123 configuration has changed and you're reusing config3270, you 134 configuration has changed and you're reusing config3270, you
124 should follow these steps: 135 should follow these steps::
136
125 Change 3270 configuration 137 Change 3270 configuration
126 Reboot 138 Reboot
127 Run config3270 and /tmp/mkdev3270 139 Run config3270 and /tmp/mkdev3270
@@ -132,8 +144,10 @@ Here are the testing steps in detail:
132 1. Bring up an x3270 window, or use an actual hardware 3278 or 144 1. Bring up an x3270 window, or use an actual hardware 3278 or
133 3279, or use the 3270 emulator of your choice. You would be 145 3279, or use the 3270 emulator of your choice. You would be
134 running the emulator on your PC or workstation. You would use 146 running the emulator on your PC or workstation. You would use
135 the command, for example, 147 the command, for example::
148
136 x3270 vm-esa-domain-name & 149 x3270 vm-esa-domain-name &
150
137 if you wanted a 3278 Model 4 with 43 rows of 80 columns, the 151 if you wanted a 3278 Model 4 with 43 rows of 80 columns, the
138 default model number. The driver does not take advantage of 152 default model number. The driver does not take advantage of
139 extended attributes. 153 extended attributes.
@@ -144,7 +158,8 @@ Here are the testing steps in detail:
144 158
145 2. Use the DIAL command instead of the LOGIN command to connect 159 2. Use the DIAL command instead of the LOGIN command to connect
146 to one of the virtual 3270s you defined with the DEF GRAF 160 to one of the virtual 3270s you defined with the DEF GRAF
147 commands: 161 commands::
162
148 dial my-vm-guest-name 163 dial my-vm-guest-name
149 164
150 3. You should immediately see a login prompt from your 165 3. You should immediately see a login prompt from your
@@ -171,14 +186,17 @@ Here are the testing steps in detail:
171 Wrong major number? Wrong minor number? There's your 186 Wrong major number? Wrong minor number? There's your
172 problem! 187 problem!
173 188
174 D. Do you get the message 189 D. Do you get the message::
190
175 "HCPDIA047E my-vm-guest-name 0620 does not exist"? 191 "HCPDIA047E my-vm-guest-name 0620 does not exist"?
192
176 If so, you must issue the command "DEF GRAF 620" from your VM 193 If so, you must issue the command "DEF GRAF 620" from your VM
177 3215 console and then reboot the system. 194 3215 console and then reboot the system.
178 195
179 196
180 197
181OPERATION. 198OPERATION.
199==========
182 200
183The driver defines three areas on the 3270 screen: the log area, the 201The driver defines three areas on the 3270 screen: the log area, the
184input area, and the status area. 202input area, and the status area.
@@ -203,8 +221,10 @@ which indicates no scrolling will occur. (If you hit ENTER with "Linux
203Running" and nothing typed, the application receives a newline.) 221Running" and nothing typed, the application receives a newline.)
204 222
205You may change the scrolling timeout value. For example, the following 223You may change the scrolling timeout value. For example, the following
206command line: 224command line::
225
207 echo scrolltime=60 > /proc/tty/driver/tty3270 226 echo scrolltime=60 > /proc/tty/driver/tty3270
227
208changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if 228changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if
209you wish to prevent scrolling entirely. 229you wish to prevent scrolling entirely.
210 230
@@ -228,7 +248,8 @@ cause an EOF also by typing "^D" and hitting ENTER.
228No PF key is preassigned to cause a job suspension, but you may cause a 248No PF key is preassigned to cause a job suspension, but you may cause a
229job suspension by typing "^Z" and hitting ENTER. You may wish to 249job suspension by typing "^Z" and hitting ENTER. You may wish to
230assign this function to a PF key. To make PF7 cause job suspension, 250assign this function to a PF key. To make PF7 cause job suspension,
231execute the command: 251execute the command::
252
232 echo pf7=^z > /proc/tty/driver/tty3270 253 echo pf7=^z > /proc/tty/driver/tty3270
233 254
234If the input you type does not end with the two characters "^n", the 255If the input you type does not end with the two characters "^n", the
@@ -243,8 +264,10 @@ command is entered into the stack only when the input area is not made
243invisible (such as for password entry) and it is not identical to the 264invisible (such as for password entry) and it is not identical to the
244current top entry. PF10 rotates backward through the command stack; 265current top entry. PF10 rotates backward through the command stack;
245PF11 rotates forward. You may assign the backward function to any PF 266PF11 rotates forward. You may assign the backward function to any PF
246key (or PA key, for that matter), say, PA3, with the command: 267key (or PA key, for that matter), say, PA3, with the command::
268
247 echo -e pa3=\\033k > /proc/tty/driver/tty3270 269 echo -e pa3=\\033k > /proc/tty/driver/tty3270
270
248This assigns the string ESC-k to PA3. Similarly, the string ESC-j 271This assigns the string ESC-k to PA3. Similarly, the string ESC-j
249performs the forward function. (Rationale: In bash with vi-mode line 272performs the forward function. (Rationale: In bash with vi-mode line
250editing, ESC-k and ESC-j retrieve backward and forward history. 273editing, ESC-k and ESC-j retrieve backward and forward history.
@@ -252,15 +275,19 @@ Suggestions welcome.)
252 275
253Is a stack size of twenty commands not to your liking? Change it on 276Is a stack size of twenty commands not to your liking? Change it on
254the fly. To change to saving the last 100 commands, execute the 277the fly. To change to saving the last 100 commands, execute the
255command: 278command::
279
256 echo recallsize=100 > /proc/tty/driver/tty3270 280 echo recallsize=100 > /proc/tty/driver/tty3270
257 281
258Have a command you issue frequently? Assign it to a PF or PA key! Use 282Have a command you issue frequently? Assign it to a PF or PA key! Use
259the command 283the command::
260 echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270 284
285 echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
286
261to execute the commands mkdir foobar and cd foobar immediately when you 287to execute the commands mkdir foobar and cd foobar immediately when you
262hit PF24. Want to see the command line first, before you execute it? 288hit PF24. Want to see the command line first, before you execute it?
263Use the -n option of the echo command: 289Use the -n option of the echo command::
290
264 echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270 291 echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270
265 292
266 293
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
deleted file mode 100644
index 5ae7f868a007..000000000000
--- a/Documentation/s390/Debugging390.txt
+++ /dev/null
@@ -1,2142 +0,0 @@
1
2 Debugging on Linux for s/390 & z/Architecture
3 by
4 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
5 Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
6 Best viewed with fixed width fonts
7
8Overview of Document:
9=====================
10This document is intended to give a good overview of how to debug Linux for
11s/390 and z/Architecture. It is not intended as a complete reference and not a
12tutorial on the fundamentals of C & assembly. It doesn't go into
13390 IO in any detail. It is intended to complement the documents in the
14reference section below & any other worthwhile references you get.
15
16It is intended like the Enterprise Systems Architecture/390 Reference Summary
17to be printed out & used as a quick cheat sheet self help style reference when
18problems occur.
19
20Contents
21========
22Register Set
23Address Spaces on Intel Linux
24Address Spaces on Linux for s/390 & z/Architecture
25The Linux for s/390 & z/Architecture Kernel Task Structure
26Register Usage & Stackframes on Linux for s/390 & z/Architecture
27A sample program with comments
28Compiling programs for debugging on Linux for s/390 & z/Architecture
29Debugging under VM
30s/390 & z/Architecture IO Overview
31Debugging IO on s/390 & z/Architecture under VM
32GDB on s/390 & z/Architecture
33Stack chaining in gdb by hand
34Examining core dumps
35ldd
36Debugging modules
37The proc file system
38SysRq
39References
40Special Thanks
41
42Register Set
43============
44The current architectures have the following registers.
45
4616 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
47r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
48
4916 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
50kernel usage only, used for memory management, interrupt control, debugging
51control etc.
52
5316 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
54normally not used by normal programs but potentially could be used as
55temporary storage. These registers have a 1:1 association with general
56purpose registers and are designed to be used in the so-called access
57register mode to select different address spaces.
58Access register 0 (and access register 1 on z/Architecture, which needs a
5964 bit pointer) is currently used by the pthread library as a pointer to
60the current running threads private area.
61
6216 64 bit floating point registers (fp0-fp15 ) IEEE & HFP floating
63point format compliant on G5 upwards & a Floating point control reg (FPC)
644 64 bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
65Note:
66Linux (currently) always uses IEEE & emulates G5 IEEE format on older machines,
67( provided the kernel is configured for this ).
68
69
70The PSW is the most important register on the machine it
71is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of
72a program counter (pc), condition code register,memory space designator.
73In IBM standard notation I am counting bit 0 as the MSB.
74It has several advantages over a normal program counter
75in that you can change address translation & program counter
76in a single instruction. To change address translation,
77e.g. switching address translation off requires that you
78have a logical=physical mapping for the address you are
79currently running at.
80
81 Bit Value
82s/390 z/Architecture
830 0 Reserved ( must be 0 ) otherwise specification exception occurs.
84
851 1 Program Event Recording 1 PER enabled,
86 PER is used to facilitate debugging e.g. single stepping.
87
882-4 2-4 Reserved ( must be 0 ).
89
905 5 Dynamic address translation 1=DAT on.
91
926 6 Input/Output interrupt Mask
93
947 7 External interrupt Mask used primarily for interprocessor
95 signalling and clock interrupts.
96
978-11 8-11 PSW Key used for complex memory protection mechanism
98 (not used under linux)
99
10012 12 1 on s/390 0 on z/Architecture
101
10213 13 Machine Check Mask 1=enable machine check interrupts
103
10414 14 Wait State. Set this to 1 to stop the processor except for
105 interrupts and give time to other LPARS. Used in CPU idle in
106 the kernel to increase overall usage of processor resources.
107
10815 15 Problem state ( if set to 1 certain instructions are disabled )
109 all linux user programs run with this bit 1
110 ( useful info for debugging under VM ).
111
11216-17 16-17 Address Space Control
113
114 00 Primary Space Mode:
115 The register CR1 contains the primary address-space control ele-
116 ment (PASCE), which points to the primary space region/segment
117 table origin.
118
119 01 Access register mode
120
121 10 Secondary Space Mode:
122 The register CR7 contains the secondary address-space control
123 element (SASCE), which points to the secondary space region or
124 segment table origin.
125
126 11 Home Space Mode:
127 The register CR13 contains the home space address-space control
128 element (HASCE), which points to the home space region/segment
129 table origin.
130
131 See "Address Spaces on Linux for s/390 & z/Architecture" below
132 for more information about address space usage in Linux.
133
13418-19 18-19 Condition codes (CC)
135
13620 20 Fixed point overflow mask if 1=FPU exceptions for this event
137 occur ( normally 0 )
138
13921 21 Decimal overflow mask if 1=FPU exceptions for this event occur
140 ( normally 0 )
141
14222 22 Exponent underflow mask if 1=FPU exceptions for this event occur
143 ( normally 0 )
144
14523 23 Significance Mask if 1=FPU exceptions for this event occur
146 ( normally 0 )
147
14824-31 24-30 Reserved Must be 0.
149
150 31 Extended Addressing Mode
151 32 Basic Addressing Mode
152 Used to set addressing mode
153 PSW 31 PSW 32
154 0 0 24 bit
155 0 1 31 bit
156 1 1 64 bit
157
15832 1=31 bit addressing mode 0=24 bit addressing mode (for backward
159 compatibility), linux always runs with this bit set to 1
160
16133-64 Instruction address.
162 33-63 Reserved must be 0
163 64-127 Address
164 In 24 bits mode bits 64-103=0 bits 104-127 Address
165 In 31 bits mode bits 64-96=0 bits 97-127 Address
166 Note: unlike 31 bit mode on s/390 bit 96 must be zero
167 when loading the address with LPSWE otherwise a
168 specification exception occurs, LPSW is fully backward
169 compatible.
170
171
172Prefix Page(s)
173--------------
174This per cpu memory area is too intimately tied to the processor not to mention.
175It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
176z/Architecture and is exchanged with one page on s/390 or two pages on
177z/Architecture in absolute storage by the set prefix instruction during Linux
178startup.
179This page is mapped to a different prefix for each processor in an SMP
180configuration (assuming the OS designer is sane of course).
181Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
182z/Architecture are used by the processor itself for holding such information
183as exception indications and entry points for exceptions.
184Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
185z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
1860x1000, too, which is used by Linux).
187The closest thing to this on traditional architectures is the interrupt
188vector table. This is a good thing & does simplify some of the kernel coding
189however it means that we now cannot catch stray NULL pointers in the
190kernel without hard coded checks.
191
192
193
194Address Spaces on Intel Linux
195=============================
196
197The traditional Intel Linux is approximately mapped as follows forgive
198the ascii art.
1990xFFFFFFFF 4GB Himem *****************
200 * *
201 * Kernel Space *
202 * *
203 ***************** ****************
204User Space Himem * User Stack * * *
205(typically 0xC0000000 3GB ) ***************** * *
206 * Shared Libs * * Next Process *
207 ***************** * to *
208 * * <== * Run * <==
209 * User Program * * *
210 * Data BSS * * *
211 * Text * * *
212 * Sections * * *
2130x00000000 ***************** ****************
214
215Now it is easy to see that on Intel it is quite easy to recognise a kernel
216address as being one greater than user space himem (in this case 0xC0000000),
217and addresses of less than this are the ones in the current running program on
218this processor (if an smp box).
219If using the virtual machine ( VM ) as a debugger it is quite difficult to
220know which user process is running as the address space you are looking at
221could be from any process in the run queue.
222
223The limitation of Intels addressing technique is that the linux
224kernel uses a very simple real address to virtual addressing technique
225of Real Address=Virtual Address-User Space Himem.
226This means that on Intel the kernel linux can typically only address
227Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
228can typically use.
229They can lower User Himem to 2GB or lower & thus be
230able to use 2GB of RAM however this shrinks the maximum size
231of User Space from 3GB to 2GB they have a no win limit of 4GB unless
232they go to 64 Bit.
233
234
235On 390 our limitations & strengths make us slightly different.
236For backward compatibility we are only allowed use 31 bits (2GB)
237of our 32 bit addresses, however, we use entirely separate address
238spaces for the user & kernel.
239
240This means we can support 2GB of non Extended RAM on s/390, & more
241with the Extended memory management swap device &
242currently 4TB of physical memory currently on z/Architecture.
243
244
245Address Spaces on Linux for s/390 & z/Architecture
246==================================================
247
248Our addressing scheme is basically as follows:
249
250 Primary Space Home Space
251Himem 0x7fffffff 2GB on s/390 ***************** ****************
252currently 0x3ffffffffff (2^42)-1 * User Stack * * *
253on z/Architecture. ***************** * *
254 * Shared Libs * * *
255 ***************** * *
256 * * * Kernel *
257 * User Program * * *
258 * Data BSS * * *
259 * Text * * *
260 * Sections * * *
2610x00000000 ***************** ****************
262
263This also means that we need to look at the PSW problem state bit and the
264addressing mode to decide whether we are looking at user or kernel space.
265
266User space runs in primary address mode (or access register mode within
267the vdso code).
268
269The kernel usually also runs in home space mode, however when accessing
270user space the kernel switches to primary or secondary address mode if
271the mvcos instruction is not available or if a compare-and-swap (futex)
272instruction on a user space address is performed.
273
274When also looking at the ASCE control registers, this means:
275
276User space:
277- runs in primary or access register mode
278- cr1 contains the user asce
279- cr7 contains the user asce
280- cr13 contains the kernel asce
281
282Kernel space:
283- runs in home space mode
284- cr1 contains the user or kernel asce
285 -> the kernel asce is loaded when a uaccess requires primary or
286 secondary address mode
287- cr7 contains the user or kernel asce, (changed with set_fs())
288- cr13 contains the kernel asce
289
290In case of uaccess the kernel changes to:
291- primary space mode in case of a uaccess (copy_to_user) and uses
292 e.g. the mvcp instruction to access user space. However the kernel
293 will stay in home space mode if the mvcos instruction is available
294- secondary space mode in case of futex atomic operations, so that the
295 instructions come from primary address space and data from secondary
296 space
297
298In case of KVM, the kernel runs in home space mode, but cr1 gets switched
299to contain the gmap asce before the SIE instruction gets executed. When
300the SIE instruction is finished, cr1 will be switched back to contain the
301user asce.
302
303
304Virtual Addresses on s/390 & z/Architecture
305===========================================
306
307A virtual address on s/390 is made up of 3 parts
308The SX (segment index, roughly corresponding to the PGD & PMD in Linux
309terminology) being bits 1-11.
310The PX (page index, corresponding to the page table entry (pte) in Linux
311terminology) being bits 12-19.
312The remaining bits BX (the byte index are the offset in the page )
313i.e. bits 20 to 31.
314
315On z/Architecture in linux we currently make up an address from 4 parts.
316The region index bits (RX) 0-32 we currently use bits 22-32
317The segment index (SX) being bits 33-43
318The page index (PX) being bits 44-51
319The byte index (BX) being bits 52-63
320
321Notes:
3221) s/390 has no PMD so the PMD is really the PGD also.
323A lot of this stuff is defined in pgtable.h.
324
3252) Also seeing as s/390's page indexes are only 1k in size
326(bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
327to make the best use of memory by updating 4 segment indices
328entries each time we mess with a PMD & use offsets
3290,1024,2048 & 3072 in this page as for our segment indexes.
330On z/Architecture our page indexes are now 2k in size
331( bits 12-19 x 8 bytes per pte ) we do a similar trick
332but only mess with 2 segment indices each time we mess with
333a PMD.
334
3353) As z/Architecture supports up to a massive 5-level page table lookup we
336can only use 3 currently on Linux ( as this is all the generic kernel
337currently supports ) however this may change in future
338this allows us to access ( according to my sums )
3394TB of virtual storage per process i.e.
3404096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
341enough for another 2 or 3 of years I think :-).
342to do this we use a region-third-table designation type in
343our address space control registers.
344
345
346The Linux for s/390 & z/Architecture Kernel Task Structure
347==========================================================
348Each process/thread under Linux for S390 has its own kernel task_struct
349defined in linux/include/linux/sched.h
350The S390 on initialisation & resuming of a process on a cpu sets
351the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
352(which we use for per-processor globals).
353
354The kernel stack pointer is intimately tied with the task structure for
355each processor as follows.
356
357 s/390
358 ************************
359 * 1 page kernel stack *
360 * ( 4K ) *
361 ************************
362 * 1 page task_struct *
363 * ( 4K ) *
3648K aligned ************************
365
366 z/Architecture
367 ************************
368 * 2 page kernel stack *
369 * ( 8K ) *
370 ************************
371 * 2 page task_struct *
372 * ( 8K ) *
37316K aligned ************************
374
375What this means is that we don't need to dedicate any register or global
376variable to point to the current running process & can retrieve it with the
377following very simple construct for s/390 & one very similar for z/Architecture.
378
379static inline struct task_struct * get_current(void)
380{
381 struct task_struct *current;
382 __asm__("lhi %0,-8192\n\t"
383 "nr %0,15"
384 : "=r" (current) );
385 return current;
386}
387
388i.e. just anding the current kernel stack pointer with the mask -8192.
389Thankfully because Linux doesn't have support for nested IO interrupts
390& our devices have large buffers can survive interrupts being shut for
391short amounts of time we don't need a separate stack for interrupts.
392
393
394
395
396Register Usage & Stackframes on Linux for s/390 & z/Architecture
397=================================================================
398Overview:
399---------
400This is the code that gcc produces at the top & the bottom of
401each function. It usually is fairly consistent & similar from
402function to function & if you know its layout you can probably
403make some headway in finding the ultimate cause of a problem
404after a crash without a source level debugger.
405
406Note: To follow stackframes requires a knowledge of C or Pascal &
407limited knowledge of one assembly language.
408
409It should be noted that there are some differences between the
410s/390 and z/Architecture stack layouts as the z/Architecture stack layout
411didn't have to maintain compatibility with older linkage formats.
412
413Glossary:
414---------
415alloca:
416This is a built in compiler function for runtime allocation
417of extra space on the callers stack which is obviously freed
418up on function exit ( e.g. the caller may choose to allocate nothing
419of a buffer of 4k if required for temporary purposes ), it generates
420very efficient code ( a few cycles ) when compared to alternatives
421like malloc.
422
423automatics: These are local variables on the stack,
424i.e they aren't in registers & they aren't static.
425
426back-chain:
427This is a pointer to the stack pointer before entering a
428framed functions ( see frameless function ) prologue got by
429dereferencing the address of the current stack pointer,
430 i.e. got by accessing the 32 bit value at the stack pointers
431current location.
432
433base-pointer:
434This is a pointer to the back of the literal pool which
435is an area just behind each procedure used to store constants
436in each function.
437
438call-clobbered: The caller probably needs to save these registers if there
439is something of value in them, on the stack or elsewhere before making a
440call to another procedure so that it can restore it later.
441
442epilogue:
443The code generated by the compiler to return to the caller.
444
445frameless-function
446A frameless function in Linux for s390 & z/Architecture is one which doesn't
447need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
448given to it by the caller.
449A frameless function never:
4501) Sets up a back chain.
4512) Calls alloca.
4523) Calls other normal functions
4534) Has automatics.
454
455GOT-pointer:
456This is a pointer to the global-offset-table in ELF
457( Executable Linkable Format, Linux'es most common executable format ),
458all globals & shared library objects are found using this pointer.
459
460lazy-binding
461ELF shared libraries are typically only loaded when routines in the shared
462library are actually first called at runtime. This is lazy binding.
463
464procedure-linkage-table
465This is a table found from the GOT which contains pointers to routines
466in other shared libraries which can't be called to by easier means.
467
468prologue:
469The code generated by the compiler to set up the stack frame.
470
471outgoing-args:
472This is extra area allocated on the stack of the calling function if the
473parameters for the callee's cannot all be put in registers, the same
474area can be reused by each function the caller calls.
475
476routine-descriptor:
477A COFF executable format based concept of a procedure reference
478actually being 8 bytes or more as opposed to a simple pointer to the routine.
479This is typically defined as follows
480Routine Descriptor offset 0=Pointer to Function
481Routine Descriptor offset 4=Pointer to Table of Contents
482The table of contents/TOC is roughly equivalent to a GOT pointer.
483& it means that shared libraries etc. can be shared between several
484environments each with their own TOC.
485
486
487static-chain: This is used in nested functions a concept adopted from pascal
488by gcc not used in ansi C or C++ ( although quite useful ), basically it
489is a pointer used to reference local variables of enclosing functions.
490You might come across this stuff once or twice in your lifetime.
491
492e.g.
493The function below should return 11 though gcc may get upset & toss warnings
494about unused variables.
495int FunctionA(int a)
496{
497 int b;
498 FunctionC(int c)
499 {
500 b=c+1;
501 }
502 FunctionC(10);
503 return(b);
504}
505
506
507s/390 & z/Architecture Register usage
508=====================================
509r0 used by syscalls/assembly call-clobbered
510r1 used by syscalls/assembly call-clobbered
511r2 argument 0 / return value 0 call-clobbered
512r3 argument 1 / return value 1 (if long long) call-clobbered
513r4 argument 2 call-clobbered
514r5 argument 3 call-clobbered
515r6 argument 4 saved
516r7 pointer-to arguments 5 to ... saved
517r8 this & that saved
518r9 this & that saved
519r10 static-chain ( if nested function ) saved
520r11 frame-pointer ( if function used alloca ) saved
521r12 got-pointer saved
522r13 base-pointer saved
523r14 return-address saved
524r15 stack-pointer saved
525
526f0 argument 0 / return value ( float/double ) call-clobbered
527f2 argument 1 call-clobbered
528f4 z/Architecture argument 2 saved
529f6 z/Architecture argument 3 saved
530The remaining floating points
531f1,f3,f5 f7-f15 are call-clobbered.
532
533Notes:
534------
5351) The only requirement is that registers which are used
536by the callee are saved, e.g. the compiler is perfectly
537capable of using r11 for purposes other than a frame a
538frame pointer if a frame pointer is not needed.
5392) In functions with variable arguments e.g. printf the calling procedure
540is identical to one without variable arguments & the same number of
541parameters. However, the prologue of this function is somewhat more
542hairy owing to it having to move these parameters to the stack to
543get va_start, va_arg & va_end to work.
5443) Access registers are currently unused by gcc but are used in
545the kernel. Possibilities exist to use them at the moment for
546temporary storage but it isn't recommended.
5474) Only 4 of the floating point registers are used for
548parameter passing as older machines such as G3 only have only 4
549& it keeps the stack frame compatible with other compilers.
550However with IEEE floating point emulation under linux on the
551older machines you are free to use the other 12.
5525) A long long or double parameter cannot be have the
553first 4 bytes in a register & the second four bytes in the
554outgoing args area. It must be purely in the outgoing args
555area if crossing this boundary.
5566) Floating point parameters are mixed with outgoing args
557on the outgoing args area in the order the are passed in as parameters.
5587) Floating point arguments 2 & 3 are saved in the outgoing args area for
559z/Architecture
560
561
562Stack Frame Layout
563------------------
564s/390 z/Architecture
5650 0 back chain ( a 0 here signifies end of back chain )
5664 8 eos ( end of stack, not used on Linux for S390 used in other linkage formats )
5678 16 glue used in other s/390 linkage formats for saved routine descriptors etc.
56812 24 glue used in other s/390 linkage formats for saved routine descriptors etc.
56916 32 scratch area
57020 40 scratch area
57124 48 saved r6 of caller function
57228 56 saved r7 of caller function
57332 64 saved r8 of caller function
57436 72 saved r9 of caller function
57540 80 saved r10 of caller function
57644 88 saved r11 of caller function
57748 96 saved r12 of caller function
57852 104 saved r13 of caller function
57956 112 saved r14 of caller function
58060 120 saved r15 of caller function
58164 128 saved f4 of caller function
58272 132 saved f6 of caller function
58380 undefined
58496 160 outgoing args passed from caller to callee
58596+x 160+x possible stack alignment ( 8 bytes desirable )
58696+x+y 160+x+y alloca space of caller ( if used )
58796+x+y+z 160+x+y+z automatics of caller ( if used )
5880 back-chain
589
590A sample program with comments.
591===============================
592
593Comments on the function test
594-----------------------------
5951) It didn't need to set up a pointer to the constant pool gpr13 as it is not
596used ( :-( ).
5972) This is a frameless function & no stack is bought.
5983) The compiler was clever enough to recognise that it could return the
599value in r2 as well as use it for the passed in parameter ( :-) ).
6004) The basr ( branch relative & save ) trick works as follows the instruction
601has a special case with r0,r0 with some instruction operands is understood as
602the literal value 0, some risc architectures also do this ). So now
603we are branching to the next address & the address new program counter is
604in r13,so now we subtract the size of the function prologue we have executed
605+ the size of the literal pool to get to the top of the literal pool
6060040037c int test(int b)
607{ # Function prologue below
608 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14
609 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using
610 400382: a7 da ff fa ahi %r13,-6 # basr trick
611 return(5+b);
612 # Huge main program
613 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2
614
615 # Function epilogue below
616 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14
617 40038e: 07 fe br %r14 # return
618}
619
620Comments on the function main
621-----------------------------
6221) The compiler did this function optimally ( 8-) )
623
624Literal pool for main.
625400390: ff ff ff ec .long 0xffffffec
626main(int argc,char *argv[])
627{ # Function prologue below
628 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers
629 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0
630 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving
631 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to
632 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool
633 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain
634
635 return(test(5)); # Main Program Below
636 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from
637 # literal pool
638 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5
639 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return
640 # address using branch & save instruction.
641
642 # Function Epilogue below
643 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers.
644 4003b8: 07 fe br %r14 # return to do program exit
645}
646
647
648Compiler updates
649----------------
650
651main(int argc,char *argv[])
652{
653 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15)
654 400500: a7 d5 00 04 bras %r13,400508 <main+0xc>
655 400504: 00 40 04 f4 .long 0x004004f4
656 # compiler now puts constant pool in code to so it saves an instruction
657 400508: 18 0f lr %r0,%r15
658 40050a: a7 fa ff a0 ahi %r15,-96
659 40050e: 50 00 f0 00 st %r0,0(%r15)
660 return(test(5));
661 400512: 58 10 d0 00 l %r1,0(%r13)
662 400516: a7 28 00 05 lhi %r2,5
663 40051a: 0d e1 basr %r14,%r1
664 # compiler adds 1 extra instruction to epilogue this is done to
665 # avoid processor pipeline stalls owing to data dependencies on g5 &
666 # above as register 14 in the old code was needed directly after being loaded
667 # by the lm %r11,%r15,140(%r15) for the br %14.
668 40051c: 58 40 f0 98 l %r4,152(%r15)
669 400520: 98 7f f0 7c lm %r7,%r15,124(%r15)
670 400524: 07 f4 br %r4
671}
672
673
674Hartmut ( our compiler developer ) also has been threatening to take out the
675stack backchain in optimised code as this also causes pipeline stalls, you
676have been warned.
677
67864 bit z/Architecture code disassembly
679--------------------------------------
680
681If you understand the stuff above you'll understand the stuff
682below too so I'll avoid repeating myself & just say that
683some of the instructions have g's on the end of them to indicate
684they are 64 bit & the stack offsets are a bigger,
685the only other difference you'll find between 32 & 64 bit is that
686we now use f4 & f6 for floating point arguments on 64 bit.
68700000000800005b0 <test>:
688int test(int b)
689{
690 return(5+b);
691 800005b0: a7 2a 00 05 ahi %r2,5
692 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer
693 800005b8: 07 fe br %r14
694 800005ba: 07 07 bcr 0,%r7
695
696
697}
698
69900000000800005bc <main>:
700main(int argc,char *argv[])
701{
702 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15)
703 800005c2: b9 04 00 1f lgr %r1,%r15
704 800005c6: a7 fb ff 60 aghi %r15,-160
705 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15)
706 return(test(5));
707 800005d0: a7 29 00 05 lghi %r2,5
708 # brasl allows jumps > 64k & is overkill here bras would do fune
709 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test>
710 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15)
711 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15)
712 800005e6: 07 f4 br %r4
713}
714
715
716
717Compiling programs for debugging on Linux for s/390 & z/Architecture
718====================================================================
719-gdwarf-2 now works it should be considered the default debugging
720format for s/390 & z/Architecture as it is more reliable for debugging
721shared libraries, normal -g debugging works much better now
722Thanks to the IBM java compiler developers bug reports.
723
724This is typically done adding/appending the flags -g or -gdwarf-2 to the
725CFLAGS & LDFLAGS variables Makefile of the program concerned.
726
727If using gdb & you would like accurate displays of registers &
728 stack traces compile without optimisation i.e make sure
729that there is no -O2 or similar on the CFLAGS line of the Makefile &
730the emitted gcc commands, obviously this will produce worse code
731( not advisable for shipment ) but it is an aid to the debugging process.
732
733This aids debugging because the compiler will copy parameters passed in
734in registers onto the stack so backtracing & looking at passed in
735parameters will work, however some larger programs which use inline functions
736will not compile without optimisation.
737
738Debugging with optimisation has since much improved after fixing
739some bugs, please make sure you are using gdb-5.0 or later developed
740after Nov'2000.
741
742
743
744Debugging under VM
745==================
746
747Notes
748-----
749Addresses & values in the VM debugger are always hex never decimal
750Address ranges are of the format <HexValue1>-<HexValue2> or
751<HexValue1>.<HexValue2>
752For example, the address range 0x2000 to 0x3000 can be described as 2000-3000
753or 2000.1000
754
755The VM Debugger is case insensitive.
756
757VM's strengths are usually other debuggers weaknesses you can get at any
758resource no matter how sensitive e.g. memory management resources, change
759address translation in the PSW. For kernel hacking you will reap dividends if
760you get good at it.
761
762The VM Debugger displays operators but not operands, and also the debugger
763displays useful information on the same line as the author of the code probably
764felt that it was a good idea not to go over the 80 columns on the screen.
765This isn't as unintuitive as it may seem as the s/390 instructions are easy to
766decode mentally and you can make a good guess at a lot of them as all the
767operands are nibble (half byte aligned).
768So if you have an objdump listing by hand, it is quite easy to follow, and if
769you don't have an objdump listing keep a copy of the s/390 Reference Summary
770or alternatively the s/390 principles of operation next to you.
771e.g. even I can guess that
7720001AFF8' LR 180F CC 0
773is a ( load register ) lr r0,r15
774
775Also it is very easy to tell the length of a 390 instruction from the 2 most
776significant bits in the instruction (not that this info is really useful except
777if you are trying to make sense of a hexdump of code).
778Here is a table
779Bits Instruction Length
780------------------------------------------
78100 2 Bytes
78201 4 Bytes
78310 4 Bytes
78411 6 Bytes
785
786The debugger also displays other useful info on the same line such as the
787addresses being operated on destination addresses of branches & condition codes.
788e.g.
78900019736' AHI A7DAFF0E CC 1
790000198BA' BRC A7840004 -> 000198C2' CC 0
791000198CE' STM 900EF068 >> 0FA95E78 CC 2
792
793
794
795Useful VM debugger commands
796---------------------------
797
798I suppose I'd better mention this before I start
799to list the current active traces do
800Q TR
801there can be a maximum of 255 of these per set
802( more about trace sets later ).
803To stop traces issue a
804TR END.
805To delete a particular breakpoint issue
806TR DEL <breakpoint number>
807
808The PA1 key drops to CP mode so you can issue debugger commands,
809Doing alt c (on my 3270 console at least ) clears the screen.
810hitting b <enter> comes back to the running operating system
811from cp mode ( in our case linux ).
812It is typically useful to add shortcuts to your profile.exec file
813if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
814file here are a few from mine.
815/* this gives me command history on issuing f12 */
816set pf12 retrieve
817/* this continues */
818set pf8 imm b
819/* goes to trace set a */
820set pf1 imm tr goto a
821/* goes to trace set b */
822set pf2 imm tr goto b
823/* goes to trace set c */
824set pf3 imm tr goto c
825
826
827
828Instruction Tracing
829-------------------
830Setting a simple breakpoint
831TR I PSWA <address>
832To debug a particular function try
833TR I R <function address range>
834TR I on its own will single step.
835TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
836e.g.
837TR I DATA 4D R 0197BC.4000
838will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
839if you were inclined you could add traces for all branch instructions &
840suffix them with the run prefix so you would have a backtrace on screen
841when a program crashes.
842TR BR <INTO OR FROM> will trace branches into or out of an address.
843e.g.
844TR BR INTO 0 is often quite useful if a program is getting awkward & deciding
845to branch to 0 & crashing as this will stop at the address before in jumps to 0.
846TR I R <address range> RUN cmd d g
847single steps a range of addresses but stays running &
848displays the gprs on each step.
849
850
851
852Displaying & modifying Registers
853--------------------------------
854D G will display all the gprs
855Adding a extra G to all the commands is necessary to access the full 64 bit
856content in VM on z/Architecture. Obviously this isn't required for access
857registers as these are still 32 bit.
858e.g. DGG instead of DG
859D X will display all the control registers
860D AR will display all the access registers
861D AR4-7 will display access registers 4 to 7
862CPU ALL D G will display the GRPS of all CPUS in the configuration
863D PSW will display the current PSW
864st PSW 2000 will put the value 2000 into the PSW &
865cause crash your machine.
866D PREFIX displays the prefix offset
867
868
869Displaying Memory
870-----------------
871To display memory mapped using the current PSW's mapping try
872D <range>
873To make VM display a message each time it hits a particular address and
874continue try
875D I<range> will disassemble/display a range of instructions.
876ST addr 32 bit word will store a 32 bit aligned address
877D T<range> will display the EBCDIC in an address (if you are that way inclined)
878D R<range> will display real addresses ( without DAT ) but with prefixing.
879There are other complex options to display if you need to get at say home space
880but are in primary space the easiest thing to do is to temporarily
881modify the PSW to the other addressing mode, display the stuff & then
882restore it.
883
884
885
886Hints
887-----
888If you want to issue a debugger command without halting your virtual machine
889with the PA1 key try prefixing the command with #CP e.g.
890#cp tr i pswa 2000
891also suffixing most debugger commands with RUN will cause them not
892to stop just display the mnemonic at the current instruction on the console.
893If you have several breakpoints you want to put into your program &
894you get fed up of cross referencing with System.map
895you can do the following trick for several symbols.
896grep do_signal System.map
897which emits the following among other things
8980001f4e0 T do_signal
899now you can do
900
901TR I PSWA 0001f4e0 cmd msg * do_signal
902This sends a message to your own console each time do_signal is entered.
903( As an aside I wrote a perl script once which automatically generated a REXX
904script with breakpoints on every kernel procedure, this isn't a good idea
905because there are thousands of these routines & VM can only set 255 breakpoints
906at a time so you nearly had to spend as long pruning the file down as you would
907entering the msgs by hand), however, the trick might be useful for a single
908object file. In the 3270 terminal emulator x3270 there is a very useful option
909in the file menu called "Save Screen In File" - this is very good for keeping a
910copy of traces.
911
912From CMS help <command name> will give you online help on a particular command.
913e.g.
914HELP DISPLAY
915
916Also CP has a file called profile.exec which automatically gets called
917on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
918CP has a feature similar to doskey, it may be useful for you to
919use profile.exec to define some keystrokes.
920e.g.
921SET PF9 IMM B
922This does a single step in VM on pressing F8.
923SET PF10 ^
924This sets up the ^ key.
925which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed directly
926into some 3270 consoles.
927SET PF11 ^-
928This types the starting keystrokes for a sysrq see SysRq below.
929SET PF12 RETRIEVE
930This retrieves command history on pressing F12.
931
932
933Sometimes in VM the display is set up to scroll automatically this
934can be very annoying if there are messages you wish to look at
935to stop this do
936TERM MORE 255 255
937This will nearly stop automatic screen updates, however it will
938cause a denial of service if lots of messages go to the 3270 console,
939so it would be foolish to use this as the default on a production machine.
940
941
942Tracing particular processes
943----------------------------
944The kernel's text segment is intentionally at an address in memory that it will
945very seldom collide with text segments of user programs ( thanks Martin ),
946this simplifies debugging the kernel.
947However it is quite common for user processes to have addresses which collide
948this can make debugging a particular process under VM painful under normal
949circumstances as the process may change when doing a
950TR I R <address range>.
951Thankfully after reading VM's online help I figured out how to debug
952I particular process.
953
954Your first problem is to find the STD ( segment table designation )
955of the program you wish to debug.
956There are several ways you can do this here are a few
9571) objdump --syms <program to be debugged> | grep main
958To get the address of main in the program.
959tr i pswa <address of main>
960Start the program, if VM drops to CP on what looks like the entry
961point of the main function this is most likely the process you wish to debug.
962Now do a D X13 or D XG13 on z/Architecture.
963On 31 bit the STD is bits 1-19 ( the STO segment table origin )
964& 25-31 ( the STL segment table length ) of CR13.
965now type
966TR I R STD <CR13's value> 0.7fffffff
967e.g.
968TR I R STD 8F32E1FF 0.7fffffff
969Another very useful variation is
970TR STORE INTO STD <CR13's value> <address range>
971for finding out when a particular variable changes.
972
973An alternative way of finding the STD of a currently running process
974is to do the following, ( this method is more complex but
975could be quite convenient if you aren't updating the kernel much &
976so your kernel structures will stay constant for a reasonable period of
977time ).
978
979grep task /proc/<pid>/status
980from this you should see something like
981task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
982This now gives you a pointer to the task structure.
983Now make CC:="s390-gcc -g" kernel/sched.s
984To get the task_struct stabinfo.
985( task_struct is defined in include/linux/sched.h ).
986Now we want to look at
987task->active_mm->pgd
988on my machine the active_mm in the task structure stab is
989active_mm:(4,12),672,32
990its offset is 672/8=84=0x54
991the pgd member in the mm_struct stab is
992pgd:(4,6)=*(29,5),96,32
993so its offset is 96/8=12=0xc
994
995so we'll
996hexdump -s 0xf160054 /dev/mem | more
997i.e. task_struct+active_mm offset
998to look at the active_mm member
999f160054 0fee cc60 0019 e334 0000 0000 0000 0011
1000hexdump -s 0x0feecc6c /dev/mem | more
1001i.e. active_mm+pgd offset
1002feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
1003we get something like
1004now do
1005TR I R STD <pgd|0x7f> 0.7fffffff
1006i.e. the 0x7f is added because the pgd only
1007gives the page table origin & we need to set the low bits
1008to the maximum possible segment table length.
1009TR I R STD 0f2c007f 0.7fffffff
1010on z/Architecture you'll probably need to do
1011TR I R STD <pgd|0x7> 0.ffffffffffffffff
1012to set the TableType to 0x1 & the Table length to 3.
1013
1014
1015
1016Tracing Program Exceptions
1017--------------------------
1018If you get a crash which says something like
1019illegal operation or specification exception followed by a register dump
1020You can restart linux & trace these using the tr prog <range or value> trace
1021option.
1022
1023
1024The most common ones you will normally be tracing for is
10251=operation exception
10262=privileged operation exception
10274=protection exception
10285=addressing exception
10296=specification exception
103010=segment translation exception
103111=page translation exception
1032
1033The full list of these is on page 22 of the current s/390 Reference Summary.
1034e.g.
1035tr prog 10 will trace segment translation exceptions.
1036tr prog on its own will trace all program interruption codes.
1037
1038Trace Sets
1039----------
1040On starting VM you are initially in the INITIAL trace set.
1041You can do a Q TR to verify this.
1042If you have a complex tracing situation where you wish to wait for instance
1043till a driver is open before you start tracing IO, but know in your
1044heart that you are going to have to make several runs through the code till you
1045have a clue whats going on.
1046
1047What you can do is
1048TR I PSWA <Driver open address>
1049hit b to continue till breakpoint
1050reach the breakpoint
1051now do your
1052TR GOTO B
1053TR IO 7c08-7c09 inst int run
1054or whatever the IO channels you wish to trace are & hit b
1055
1056To got back to the initial trace set do
1057TR GOTO INITIAL
1058& the TR I PSWA <Driver open address> will be the only active breakpoint again.
1059
1060
1061Tracing linux syscalls under VM
1062-------------------------------
1063Syscalls are implemented on Linux for S390 by the Supervisor call instruction
1064(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
1065opcode and the second byte being the syscall number. They are traced using the
1066simple command:
1067TR SVC <Optional value or range>
1068the syscalls are defined in linux/arch/s390/include/asm/unistd.h
1069e.g. to trace all file opens just do
1070TR SVC 5 ( as this is the syscall number of open )
1071
1072
1073SMP Specific commands
1074---------------------
1075To find out how many cpus you have
1076Q CPUS displays all the CPU's available to your virtual machine
1077To find the cpu that the current cpu VM debugger commands are being directed at
1078do Q CPU to change the current cpu VM debugger commands are being directed at do
1079CPU <desired cpu no>
1080
1081On a SMP guest issue a command to all CPUs try prefixing the command with cpu
1082all. To issue a command to a particular cpu try cpu <cpu number> e.g.
1083CPU 01 TR I R 2000.3000
1084If you are running on a guest with several cpus & you have a IO related problem
1085& cannot follow the flow of code but you know it isn't smp related.
1086from the bash prompt issue
1087shutdown -h now or halt.
1088do a Q CPUS to find out how many cpus you have
1089detach each one of them from cp except cpu 0
1090by issuing a
1091DETACH CPU 01-(number of cpus in configuration)
1092& boot linux again.
1093TR SIGP will trace inter processor signal processor instructions.
1094DEFINE CPU 01-(number in configuration)
1095will get your guests cpus back.
1096
1097
1098Help for displaying ascii textstrings
1099-------------------------------------
1100On the very latest VM Nucleus'es VM can now display ascii
1101( thanks Neale for the hint ) by doing
1102D TX<lowaddr>.<len>
1103e.g.
1104D TX0.100
1105
1106Alternatively
1107=============
1108Under older VM debuggers (I love EBDIC too) you can use following little
1109program which converts a command line of hex digits to ascii text. It can be
1110compiled under linux and you can copy the hex digits from your x3270 terminal
1111to your xterm if you are debugging from a linuxbox.
1112
1113This is quite useful when looking at a parameter passed in as a text string
1114under VM ( unless you are good at decoding ASCII in your head ).
1115
1116e.g. consider tracing an open syscall
1117TR SVC 5
1118We have stopped at a breakpoint
1119000151B0' SVC 0A05 -> 0001909A' CC 0
1120
1121D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
1122(for the layout of the prefix area consult the "Fixed Storage Locations"
1123chapter of the s/390 Reference Summary if you have it available).
1124V00000020 070C2000 800151B2
1125The problem state bit wasn't set & it's also too early in the boot sequence
1126for it to be a userspace SVC if it was we would have to temporarily switch the
1127psw to user space addressing so we could get at the first parameter of the open
1128in gpr2.
1129Next do a
1130D G2
1131GPR 2 = 00014CB4
1132Now display what gpr2 is pointing to
1133D 00014CB4.20
1134V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5
1135V00014CC4 FC00014C B4001001 E0001000 B8070707
1136Now copy the text till the first 00 hex ( which is the end of the string
1137to an xterm & do hex2ascii on it.
1138hex2ascii 2F646576 2F636F6E 736F6C65 00
1139outputs
1140Decoded Hex:=/ d e v / c o n s o l e 0x00
1141We were opening the console device,
1142
1143You can compile the code below yourself for practice :-),
1144/*
1145 * hex2ascii.c
1146 * a useful little tool for converting a hexadecimal command line to ascii
1147 *
1148 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
1149 * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
1150 */
1151#include <stdio.h>
1152
1153int main(int argc,char *argv[])
1154{
1155 int cnt1,cnt2,len,toggle=0;
1156 int startcnt=1;
1157 unsigned char c,hex;
1158
1159 if(argc>1&&(strcmp(argv[1],"-a")==0))
1160 startcnt=2;
1161 printf("Decoded Hex:=");
1162 for(cnt1=startcnt;cnt1<argc;cnt1++)
1163 {
1164 len=strlen(argv[cnt1]);
1165 for(cnt2=0;cnt2<len;cnt2++)
1166 {
1167 c=argv[cnt1][cnt2];
1168 if(c>='0'&&c<='9')
1169 c=c-'0';
1170 if(c>='A'&&c<='F')
1171 c=c-'A'+10;
1172 if(c>='a'&&c<='f')
1173 c=c-'a'+10;
1174 switch(toggle)
1175 {
1176 case 0:
1177 hex=c<<4;
1178 toggle=1;
1179 break;
1180 case 1:
1181 hex+=c;
1182 if(hex<32||hex>127)
1183 {
1184 if(startcnt==1)
1185 printf("0x%02X ",(int)hex);
1186 else
1187 printf(".");
1188 }
1189 else
1190 {
1191 printf("%c",hex);
1192 if(startcnt==1)
1193 printf(" ");
1194 }
1195 toggle=0;
1196 break;
1197 }
1198 }
1199 }
1200 printf("\n");
1201}
1202
1203
1204
1205
1206Stack tracing under VM
1207----------------------
1208A basic backtrace
1209-----------------
1210
1211Here are the tricks I use 9 out of 10 times it works pretty well,
1212
1213When your backchain reaches a dead end
1214--------------------------------------
1215This can happen when an exception happens in the kernel and the kernel is
1216entered twice. If you reach the NULL pointer at the end of the back chain you
1217should be able to sniff further back if you follow the following tricks.
12181) A kernel address should be easy to recognise since it is in
1219primary space & the problem state bit isn't set & also
1220The Hi bit of the address is set.
12212) Another backchain should also be easy to recognise since it is an
1222address pointing to another address approximately 100 bytes or 0x70 hex
1223behind the current stackpointer.
1224
1225
1226Here is some practice.
1227boot the kernel & hit PA1 at some random time
1228d g to display the gprs, this should display something like
1229GPR 0 = 00000001 00156018 0014359C 00000000
1230GPR 4 = 00000001 001B8888 000003E0 00000000
1231GPR 8 = 00100080 00100084 00000000 000FE000
1232GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8
1233Note that GPR14 is a return address but as we are real men we are going to
1234trace the stack.
1235display 0x40 bytes after the stack pointer.
1236
1237V000FFED8 000FFF38 8001B838 80014C8E 000FFF38
1238V000FFEE8 00000000 00000000 000003E0 00000000
1239V000FFEF8 00100080 00100084 00000000 000FE000
1240V000FFF08 00010400 8001B2DC 8001B36A 000FFED8
1241
1242
1243Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
1244you look above at our stackframe & also agrees with GPR14.
1245
1246now backchain
1247d 000FFF38.40
1248we now are taking the contents of SP to get our first backchain.
1249
1250V000FFF38 000FFFA0 00000000 00014995 00147094
1251V000FFF48 00147090 001470A0 000003E0 00000000
1252V000FFF58 00100080 00100084 00000000 001BF1D0
1253V000FFF68 00010400 800149BA 80014CA6 000FFF38
1254
1255This displays a 2nd return address of 80014CA6
1256
1257now do d 000FFFA0.40 for our 3rd backchain
1258
1259V000FFFA0 04B52002 0001107F 00000000 00000000
1260V000FFFB0 00000000 00000000 FF000000 0001107F
1261V000FFFC0 00000000 00000000 00000000 00000000
1262V000FFFD0 00010400 80010802 8001085A 000FFFA0
1263
1264
1265our 3rd return address is 8001085A
1266
1267as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
1268kernel entry routines for the sake of optimisation don't set up a backchain.
1269
1270now look at System.map to see if the addresses make any sense.
1271
1272grep -i 0001b3 System.map
1273outputs among other things
12740001b304 T cpu_idle
1275so 8001B36A
1276is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
1277
1278
1279grep -i 00014 System.map
1280produces among other things
128100014a78 T start_kernel
1282so 0014CA6 is start_kernel+some hex number I can't add in my head.
1283
1284grep -i 00108 System.map
1285this produces
128600010800 T _stext
1287so 8001085A is _stext+0x5a
1288
1289Congrats you've done your first backchain.
1290
1291
1292
1293s/390 & z/Architecture IO Overview
1294==================================
1295
1296I am not going to give a course in 390 IO architecture as this would take me
1297quite a while and I'm no expert. Instead I'll give a 390 IO architecture
1298summary for Dummies. If you have the s/390 principles of operation available
1299read this instead. If nothing else you may find a few useful keywords in here
1300and be able to use them on a web search engine to find more useful information.
1301
1302Unlike other bus architectures modern 390 systems do their IO using mostly
1303fibre optics and devices such as tapes and disks can be shared between several
1304mainframes. Also S390 can support up to 65536 devices while a high end PC based
1305system might be choking with around 64.
1306
1307Here is some of the common IO terminology:
1308
1309Subchannel:
1310This is the logical number most IO commands use to talk to an IO device. There
1311can be up to 0x10000 (65536) of these in a configuration, typically there are a
1312few hundred. Under VM for simplicity they are allocated contiguously, however
1313on the native hardware they are not. They typically stay consistent between
1314boots provided no new hardware is inserted or removed.
1315Under Linux for s390 we use these as IRQ's and also when issuing an IO command
1316(CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
1317START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
1318of the device we wish to talk to. The most important of these instructions are
1319START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
1320completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
1321up to 8 channel paths to a device, this offers redundancy if one is not
1322available.
1323
1324Device Number:
1325This number remains static and is closely tied to the hardware. There are 65536
1326of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
1327another lsb 8 bits. These remain static even if more devices are inserted or
1328removed from the hardware. There is a 1 to 1 mapping between subchannels and
1329device numbers, provided devices aren't inserted or removed.
1330
1331Channel Control Words:
1332CCWs are linked lists of instructions initially pointed to by an operation
1333request block (ORB), which is initially given to Start Subchannel (SSCH)
1334command along with the subchannel number for the IO subsystem to process
1335while the CPU continues executing normal code.
1336CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
1337Format 1 (31 bit). These are typically used to issue read and write (and many
1338other) instructions. They consist of a length field and an absolute address
1339field.
1340Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
1341when the channel is idle, and the second for device end (secondary status).
1342Sometimes you get both concurrently. You check how the IO went on by issuing a
1343TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
1344response block (IRB). If you get channel and device end status in the IRB
1345without channel checks etc. your IO probably went okay. If you didn't you
1346probably need to examine the IRB, extended status word etc.
1347If an error occurs, more sophisticated control units have a facility known as
1348concurrent sense. This means that if an error occurs Extended sense information
1349will be presented in the Extended status word in the IRB. If not you have to
1350issue a subsequent SENSE CCW command after the test subchannel.
1351
1352
1353TPI (Test pending interrupt) can also be used for polled IO, but in
1354multitasking multiprocessor systems it isn't recommended except for
1355checking special cases (i.e. non looping checks for pending IO etc.).
1356
1357Store Subchannel and Modify Subchannel can be used to examine and modify
1358operating characteristics of a subchannel (e.g. channel paths).
1359
1360Other IO related Terms:
1361Sysplex: S390's Clustering Technology
1362QDIO: S390's new high speed IO architecture to support devices such as gigabit
1363ethernet, this architecture is also designed to be forward compatible with
1364upcoming 64 bit machines.
1365
1366
1367General Concepts
1368
1369Input Output Processors (IOP's) are responsible for communicating between
1370the mainframe CPU's & the channel & relieve the mainframe CPU's from the
1371burden of communicating with IO devices directly, this allows the CPU's to
1372concentrate on data processing.
1373
1374IOP's can use one or more links ( known as channel paths ) to talk to each
1375IO device. It first checks for path availability & chooses an available one,
1376then starts ( & sometimes terminates IO ).
1377There are two types of channel path: ESCON & the Parallel IO interface.
1378
1379IO devices are attached to control units, control units provide the
1380logic to interface the channel paths & channel path IO protocols to
1381the IO devices, they can be integrated with the devices or housed separately
1382& often talk to several similar devices ( typical examples would be raid
1383controllers or a control unit which connects to 1000 3270 terminals ).
1384
1385
1386 +---------------------------------------------------------------+
1387 | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
1388 | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | |
1389 | | | | | | | | | | Memory | | Storage | |
1390 | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
1391 |---------------------------------------------------------------+
1392 | IOP | IOP | IOP |
1393 |---------------------------------------------------------------
1394 | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
1395 ----------------------------------------------------------------
1396 || ||
1397 || Bus & Tag Channel Path || ESCON
1398 || ====================== || Channel
1399 || || || || Path
1400 +----------+ +----------+ +----------+
1401 | | | | | |
1402 | CU | | CU | | CU |
1403 | | | | | |
1404 +----------+ +----------+ +----------+
1405 | | | | |
1406+----------+ +----------+ +----------+ +----------+ +----------+
1407|I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device|
1408+----------+ +----------+ +----------+ +----------+ +----------+
1409 CPU = Central Processing Unit
1410 C = Channel
1411 IOP = IP Processor
1412 CU = Control Unit
1413
1414The 390 IO systems come in 2 flavours the current 390 machines support both
1415
1416The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
1417sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
1418Interface (OEMI).
1419
1420This byte wide Parallel channel path/bus has parity & data on the "Bus" cable
1421and control lines on the "Tag" cable. These can operate in byte multiplex mode
1422for sharing between several slow devices or burst mode and monopolize the
1423channel for the whole burst. Up to 256 devices can be addressed on one of these
1424cables. These cables are about one inch in diameter. The maximum unextended
1425length supported by these cables is 125 Meters but this can be extended up to
14262km with a fibre optic channel extended such as a 3044. The maximum burst speed
1427supported is 4.5 megabytes per second. However, some really old processors
1428support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
1429One of these paths can be daisy chained to up to 8 control units.
1430
1431
1432ESCON if fibre optic it is also called FICON
1433Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
1434lasers for communication at a signaling rate of up to 200 megabits/sec. As
143510bits are transferred for every 8 bits info this drops to 160 megabits/sec
1436and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
1437operates in burst mode.
1438
1439ESCONs typical max cable length is 3km for the led version and 20km for the
1440laser version known as XDF (extended distance facility). This can be further
1441extended by using an ESCON director which triples the above mentioned ranges.
1442Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
1443the standard Bus & Tag control protocol is however present within the packets.
1444Up to 256 devices can be attached to each control unit that uses one of these
1445interfaces.
1446
1447Common 390 Devices include:
1448Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
1449Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
1450DASD's direct access storage devices ( otherwise known as hard disks ).
1451Tape Drives.
1452CTC ( Channel to Channel Adapters ),
1453ESCON or Parallel Cables used as a very high speed serial link
1454between 2 machines.
1455
1456
1457Debugging IO on s/390 & z/Architecture under VM
1458===============================================
1459
1460Now we are ready to go on with IO tracing commands under VM
1461
1462A few self explanatory queries:
1463Q OSA
1464Q CTC
1465Q DISK ( This command is CMS specific )
1466Q DASD
1467
1468
1469
1470
1471
1472
1473Q OSA on my machine returns
1474OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000
1475OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001
1476OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002
1477OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003
1478
1479If you have a guest with certain privileges you may be able to see devices
1480which don't belong to you. To avoid this, add the option V.
1481e.g.
1482Q V OSA
1483
1484Now using the device numbers returned by this command we will
1485Trace the io starting up on the first device 7c08 & 7c09
1486In our simplest case we can trace the
1487start subchannels
1488like TR SSCH 7C08-7C09
1489or the halt subchannels
1490or TR HSCH 7C08-7C09
1491MSCH's ,STSCH's I think you can guess the rest
1492
1493A good trick is tracing all the IO's and CCWS and spooling them into the reader
1494of another VM guest so he can ftp the logfile back to his own machine. I'll do
1495a small bit of this and give you a look at the output.
1496
14971) Spool stdout to VM reader
1498SP PRT TO (another vm guest ) or * for the local vm guest
14992) Fill the reader with the trace
1500TR IO 7c08-7c09 INST INT CCW PRT RUN
15013) Start up linux
1502i 00c
15034) Finish the trace
1504TR END
15055) close the reader
1506C PRT
15076) list reader contents
1508RDRLIST
15097) copy it to linux4's minidisk
1510RECEIVE / LOG TXT A1 ( replace
15118)
1512filel & press F11 to look at it
1513You should see something like:
1514
151500020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08
1516 CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80
1517 CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........
1518 IDAL 43D8AFE8
1519 IDAL 0FB76000
152000020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4
152100021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08
1522 CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC
1523 KEY 0 FPI C0 CC 0 CTLS 4007
152400022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08
1525
1526If you don't like messing up your readed ( because you possibly booted from it )
1527you can alternatively spool it to another readers guest.
1528
1529
1530Other common VM device related commands
1531---------------------------------------------
1532These commands are listed only because they have
1533been of use to me in the past & may be of use to
1534you too. For more complete info on each of the commands
1535use type HELP <command> from CMS.
1536detaching devices
1537DET <devno range>
1538ATT <devno range> <guest>
1539attach a device to guest * for your own guest
1540READY <devno> cause VM to issue a fake interrupt.
1541
1542The VARY command is normally only available to VM administrators.
1543VARY ON PATH <path> TO <devno range>
1544VARY OFF PATH <PATH> FROM <devno range>
1545This is used to switch on or off channel paths to devices.
1546
1547Q CHPID <channel path ID>
1548This displays state of devices using this channel path
1549D SCHIB <subchannel>
1550This displays the subchannel information SCHIB block for the device.
1551this I believe is also only available to administrators.
1552DEFINE CTC <devno>
1553defines a virtual CTC channel to channel connection
15542 need to be defined on each guest for the CTC driver to use.
1555COUPLE devno userid remote devno
1556Joins a local virtual device to a remote virtual device
1557( commonly used for the CTC driver ).
1558
1559Building a VM ramdisk under CMS which linux can use
1560def vfb-<blocksize> <subchannel> <number blocks>
1561blocksize is commonly 4096 for linux.
1562Formatting it
1563format <subchannel> <driver letter e.g. x> (blksize <blocksize>
1564
1565Sharing a disk between multiple guests
1566LINK userid devno1 devno2 mode password
1567
1568
1569
1570GDB on S390
1571===========
1572N.B. if compiling for debugging gdb works better without optimisation
1573( see Compiling programs for debugging )
1574
1575invocation
1576----------
1577gdb <victim program> <optional corefile>
1578
1579Online help
1580-----------
1581help: gives help on commands
1582e.g.
1583help
1584help display
1585Note gdb's online help is very good use it.
1586
1587
1588Assembly
1589--------
1590info registers: displays registers other than floating point.
1591info all-registers: displays floating points as well.
1592disassemble: disassembles
1593e.g.
1594disassemble without parameters will disassemble the current function
1595disassemble $pc $pc+10
1596
1597Viewing & modifying variables
1598-----------------------------
1599print or p: displays variable or register
1600e.g. p/x $sp will display the stack pointer
1601
1602display: prints variable or register each time program stops
1603e.g.
1604display/x $pc will display the program counter
1605display argc
1606
1607undisplay : undo's display's
1608
1609info breakpoints: shows all current breakpoints
1610
1611info stack: shows stack back trace (if this doesn't work too well, I'll show
1612you the stacktrace by hand below).
1613
1614info locals: displays local variables.
1615
1616info args: display current procedure arguments.
1617
1618set args: will set argc & argv each time the victim program is invoked.
1619
1620set <variable>=value
1621set argc=100
1622set $pc=0
1623
1624
1625
1626Modifying execution
1627-------------------
1628step: steps n lines of sourcecode
1629step steps 1 line.
1630step 100 steps 100 lines of code.
1631
1632next: like step except this will not step into subroutines
1633
1634stepi: steps a single machine code instruction.
1635e.g. stepi 100
1636
1637nexti: steps a single machine code instruction but will not step into
1638subroutines.
1639
1640finish: will run until exit of the current routine
1641
1642run: (re)starts a program
1643
1644cont: continues a program
1645
1646quit: exits gdb.
1647
1648
1649breakpoints
1650------------
1651
1652break
1653sets a breakpoint
1654e.g.
1655
1656break main
1657
1658break *$pc
1659
1660break *0x400618
1661
1662Here's a really useful one for large programs
1663rbr
1664Set a breakpoint for all functions matching REGEXP
1665e.g.
1666rbr 390
1667will set a breakpoint with all functions with 390 in their name.
1668
1669info breakpoints
1670lists all breakpoints
1671
1672delete: delete breakpoint by number or delete them all
1673e.g.
1674delete 1 will delete the first breakpoint
1675delete will delete them all
1676
1677watch: This will set a watchpoint ( usually hardware assisted ),
1678This will watch a variable till it changes
1679e.g.
1680watch cnt, will watch the variable cnt till it changes.
1681As an aside unfortunately gdb's, architecture independent watchpoint code
1682is inconsistent & not very good, watchpoints usually work but not always.
1683
1684info watchpoints: Display currently active watchpoints
1685
1686condition: ( another useful one )
1687Specify breakpoint number N to break only if COND is true.
1688Usage is `condition N COND', where N is an integer and COND is an
1689expression to be evaluated whenever breakpoint N is reached.
1690
1691
1692
1693User defined functions/macros
1694-----------------------------
1695define: ( Note this is very very useful,simple & powerful )
1696usage define <name> <list of commands> end
1697
1698examples which you should consider putting into .gdbinit in your home directory
1699define d
1700stepi
1701disassemble $pc $pc+10
1702end
1703
1704define e
1705nexti
1706disassemble $pc $pc+10
1707end
1708
1709
1710Other hard to classify stuff
1711----------------------------
1712signal n:
1713sends the victim program a signal.
1714e.g. signal 3 will send a SIGQUIT.
1715
1716info signals:
1717what gdb does when the victim receives certain signals.
1718
1719list:
1720e.g.
1721list lists current function source
1722list 1,10 list first 10 lines of current file.
1723list test.c:1,10
1724
1725
1726directory:
1727Adds directories to be searched for source if gdb cannot find the source.
1728(note it is a bit sensitive about slashes)
1729e.g. To add the root of the filesystem to the searchpath do
1730directory //
1731
1732
1733call <function>
1734This calls a function in the victim program, this is pretty powerful
1735e.g.
1736(gdb) call printf("hello world")
1737outputs:
1738$1 = 11
1739
1740You might now be thinking that the line above didn't work, something extra had
1741to be done.
1742(gdb) call fflush(stdout)
1743hello world$2 = 0
1744As an aside the debugger also calls malloc & free under the hood
1745to make space for the "hello world" string.
1746
1747
1748
1749hints
1750-----
17511) command completion works just like bash
1752( if you are a bad typist like me this really helps )
1753e.g. hit br <TAB> & cursor up & down :-).
1754
17552) if you have a debugging problem that takes a few steps to recreate
1756put the steps into a file called .gdbinit in your current working directory
1757if you have defined a few extra useful user defined commands put these in
1758your home directory & they will be read each time gdb is launched.
1759
1760A typical .gdbinit file might be.
1761break main
1762run
1763break runtime_exception
1764cont
1765
1766
1767stack chaining in gdb by hand
1768-----------------------------
1769This is done using a the same trick described for VM
1770p/x (*($sp+56))&0x7fffffff get the first backchain.
1771
1772For z/Architecture
1773Replace 56 with 112 & ignore the &0x7fffffff
1774in the macros below & do nasty casts to longs like the following
1775as gdb unfortunately deals with printed arguments as ints which
1776messes up everything.
1777i.e. here is a 3rd backchain dereference
1778p/x *(long *)(***(long ***)$sp+112)
1779
1780
1781this outputs
1782$5 = 0x528f18
1783on my machine.
1784Now you can use
1785info symbol (*($sp+56))&0x7fffffff
1786you might see something like.
1787rl_getc + 36 in section .text telling you what is located at address 0x528f18
1788Now do.
1789p/x (*(*$sp+56))&0x7fffffff
1790This outputs
1791$6 = 0x528ed0
1792Now do.
1793info symbol (*(*$sp+56))&0x7fffffff
1794rl_read_key + 180 in section .text
1795now do
1796p/x (*(**$sp+56))&0x7fffffff
1797& so on.
1798
1799Disassembling instructions without debug info
1800---------------------------------------------
1801gdb typically complains if there is a lack of debugging
1802symbols in the disassemble command with
1803"No function contains specified address." To get around
1804this do
1805x/<number lines to disassemble>xi <address>
1806e.g.
1807x/20xi 0x400730
1808
1809
1810
1811Note: Remember gdb has history just like bash you don't need to retype the
1812whole line just use the up & down arrows.
1813
1814
1815
1816For more info
1817-------------
1818From your linuxbox do
1819man gdb or info gdb.
1820
1821core dumps
1822----------
1823What a core dump ?,
1824A core dump is a file generated by the kernel (if allowed) which contains the
1825registers and all active pages of the program which has crashed.
1826From this file gdb will allow you to look at the registers, stack trace and
1827memory of the program as if it just crashed on your system. It is usually
1828called core and created in the current working directory.
1829This is very useful in that a customer can mail a core dump to a technical
1830support department and the technical support department can reconstruct what
1831happened. Provided they have an identical copy of this program with debugging
1832symbols compiled in and the source base of this build is available.
1833In short it is far more useful than something like a crash log could ever hope
1834to be.
1835
1836Why have I never seen one ?.
1837Probably because you haven't used the command
1838ulimit -c unlimited in bash
1839to allow core dumps, now do
1840ulimit -a
1841to verify that the limit was accepted.
1842
1843A sample core dump
1844To create this I'm going to do
1845ulimit -c unlimited
1846gdb
1847to launch gdb (my victim app. ) now be bad & do the following from another
1848telnet/xterm session to the same machine
1849ps -aux | grep gdb
1850kill -SIGSEGV <gdb's pid>
1851or alternatively use killall -SIGSEGV gdb if you have the killall command.
1852Now look at the core dump.
1853./gdb core
1854Displays the following
1855GNU gdb 4.18
1856Copyright 1998 Free Software Foundation, Inc.
1857GDB is free software, covered by the GNU General Public License, and you are
1858welcome to change it and/or distribute copies of it under certain conditions.
1859Type "show copying" to see the conditions.
1860There is absolutely no warranty for GDB. Type "show warranty" for details.
1861This GDB was configured as "s390-ibm-linux"...
1862Core was generated by `./gdb'.
1863Program terminated with signal 11, Segmentation fault.
1864Reading symbols from /usr/lib/libncurses.so.4...done.
1865Reading symbols from /lib/libm.so.6...done.
1866Reading symbols from /lib/libc.so.6...done.
1867Reading symbols from /lib/ld-linux.so.2...done.
1868#0 0x40126d1a in read () from /lib/libc.so.6
1869Setting up the environment for debugging gdb.
1870Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
1871Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
1872(top-gdb) info stack
1873#0 0x40126d1a in read () from /lib/libc.so.6
1874#1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
1875#2 0x528ed0 in rl_read_key () at input.c:381
1876#3 0x5167e6 in readline_internal_char () at readline.c:454
1877#4 0x5168ee in readline_internal_charloop () at readline.c:507
1878#5 0x51692c in readline_internal () at readline.c:521
1879#6 0x5164fe in readline (prompt=0x7ffff810)
1880 at readline.c:349
1881#7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
1882 annotation_suffix=0x4d6b44 "prompt") at top.c:2091
1883#8 0x4d6cf0 in command_loop () at top.c:1345
1884#9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
1885
1886
1887LDD
1888===
1889This is a program which lists the shared libraries which a library needs,
1890Note you also get the relocations of the shared library text segments which
1891help when using objdump --source.
1892e.g.
1893 ldd ./gdb
1894outputs
1895libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
1896libm.so.6 => /lib/libm.so.6 (0x4005e000)
1897libc.so.6 => /lib/libc.so.6 (0x40084000)
1898/lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
1899
1900
1901Debugging shared libraries
1902==========================
1903Most programs use shared libraries, however it can be very painful
1904when you single step instruction into a function like printf for the
1905first time & you end up in functions like _dl_runtime_resolve this is
1906the ld.so doing lazy binding, lazy binding is a concept in ELF where
1907shared library functions are not loaded into memory unless they are
1908actually used, great for saving memory but a pain to debug.
1909To get around this either relink the program -static or exit gdb type
1910export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing
1911the program in question.
1912
1913
1914
1915Debugging modules
1916=================
1917As modules are dynamically loaded into the kernel their address can be
1918anywhere to get around this use the -m option with insmod to emit a load
1919map which can be piped into a file if required.
1920
1921The proc file system
1922====================
1923What is it ?.
1924It is a filesystem created by the kernel with files which are created on demand
1925by the kernel if read, or can be used to modify kernel parameters,
1926it is a powerful concept.
1927
1928e.g.
1929
1930cat /proc/sys/net/ipv4/ip_forward
1931On my machine outputs
19320
1933telling me ip_forwarding is not on to switch it on I can do
1934echo 1 > /proc/sys/net/ipv4/ip_forward
1935cat it again
1936cat /proc/sys/net/ipv4/ip_forward
1937On my machine now outputs
19381
1939IP forwarding is on.
1940There is a lot of useful info in here best found by going in and having a look
1941around, so I'll take you through some entries I consider important.
1942
1943All the processes running on the machine have their own entry defined by
1944/proc/<pid>
1945So lets have a look at the init process
1946cd /proc/1
1947
1948cat cmdline
1949emits
1950init [2]
1951
1952cd /proc/1/fd
1953This contains numerical entries of all the open files,
1954some of these you can cat e.g. stdout (2)
1955
1956cat /proc/29/maps
1957on my machine emits
1958
195900400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash
196000478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash
19610047e000-00492000 rwxp 00000000 00:00 0
196240000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so
196340015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so
196440016000-40017000 rwxp 00000000 00:00 0
196540017000-40018000 rw-p 00000000 00:00 0
196640018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8
19674001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8
19684001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so
19694010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so
197040111000-40114000 rw-p 00000000 00:00 0
197140114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so
19724011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so
19737fffd000-80000000 rwxp ffffe000 00:00 0
1974
1975
1976Showing us the shared libraries init uses where they are in memory
1977& memory access permissions for each virtual memory area.
1978
1979/proc/1/cwd is a softlink to the current working directory.
1980/proc/1/root is the root of the filesystem for this process.
1981
1982/proc/1/mem is the current running processes memory which you
1983can read & write to like a file.
1984strace uses this sometimes as it is a bit faster than the
1985rather inefficient ptrace interface for peeking at DATA.
1986
1987
1988cat status
1989
1990Name: init
1991State: S (sleeping)
1992Pid: 1
1993PPid: 0
1994Uid: 0 0 0 0
1995Gid: 0 0 0 0
1996Groups:
1997VmSize: 408 kB
1998VmLck: 0 kB
1999VmRSS: 208 kB
2000VmData: 24 kB
2001VmStk: 8 kB
2002VmExe: 368 kB
2003VmLib: 0 kB
2004SigPnd: 0000000000000000
2005SigBlk: 0000000000000000
2006SigIgn: 7fffffffd7f0d8fc
2007SigCgt: 00000000280b2603
2008CapInh: 00000000fffffeff
2009CapPrm: 00000000ffffffff
2010CapEff: 00000000fffffeff
2011
2012User PSW: 070de000 80414146
2013task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
2014User GPRS:
201500000400 00000000 0000000b 7ffffa90
201600000000 00000000 00000000 0045d9f4
20170045cafc 7ffffa90 7fffff18 0045cb08
201800010400 804039e8 80403af8 7ffff8b0
2019User ACRS:
202000000000 00000000 00000000 00000000
202100000001 00000000 00000000 00000000
202200000000 00000000 00000000 00000000
202300000000 00000000 00000000 00000000
2024Kernel BackChain CallChain BackChain CallChain
2025 004b7ca8 8002bd0c 004b7d18 8002b92c
2026 004b7db8 8005cd50 004b7e38 8005d12a
2027 004b7f08 80019114
2028Showing among other things memory usage & status of some signals &
2029the processes'es registers from the kernel task_structure
2030as well as a backchain which may be useful if a process crashes
2031in the kernel for some unknown reason.
2032
2033Some driver debugging techniques
2034================================
2035debug feature
2036-------------
2037Some of our drivers now support a "debug feature" in
2038/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
2039for more info.
2040e.g.
2041to switch on the lcs "debug feature"
2042echo 5 > /proc/s390dbf/lcs/level
2043& then after the error occurred.
2044cat /proc/s390dbf/lcs/sprintf >/logfile
2045the logfile now contains some information which may help
2046tech support resolve a problem in the field.
2047
2048
2049
2050high level debugging network drivers
2051------------------------------------
2052ifconfig is a quite useful command
2053it gives the current state of network drivers.
2054
2055If you suspect your network device driver is dead
2056one way to check is type
2057ifconfig <network device>
2058e.g. tr0
2059You should see something like
2060tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48
2061 inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0
2062 UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1
2063 RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
2064 TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
2065 collisions:0 txqueuelen:100
2066
2067if the device doesn't say up
2068try
2069/etc/rc.d/init.d/network start
2070( this starts the network stack & hopefully calls ifconfig tr0 up ).
2071ifconfig looks at the output of /proc/net/dev and presents it in a more
2072presentable form.
2073Now ping the device from a machine in the same subnet.
2074if the RX packets count & TX packets counts don't increment you probably
2075have problems.
2076next
2077cat /proc/net/arp
2078Do you see any hardware addresses in the cache if not you may have problems.
2079Next try
2080ping -c 5 <broadcast_addr> i.e. the Bcast field above in the output of
2081ifconfig. Do you see any replies from machines other than the local machine
2082if not you may have problems. also if the TX packets count in ifconfig
2083hasn't incremented either you have serious problems in your driver
2084(e.g. the txbusy field of the network device being stuck on )
2085or you may have multiple network devices connected.
2086
2087
2088chandev
2089-------
2090There is a new device layer for channel devices, some
2091drivers e.g. lcs are registered with this layer.
2092If the device uses the channel device layer you'll be
2093able to find what interrupts it uses & the current state
2094of the device.
2095See the manpage chandev.8 &type cat /proc/chandev for more info.
2096
2097
2098SysRq
2099=====
2100This is now supported by linux for s/390 & z/Architecture.
2101To enable it do compile the kernel with
2102Kernel Hacking -> Magic SysRq Key Enabled
2103echo "1" > /proc/sys/kernel/sysrq
2104also type
2105echo "8" >/proc/sys/kernel/printk
2106To make printk output go to console.
2107On 390 all commands are prefixed with
2108^-
2109e.g.
2110^-t will show tasks.
2111^-? or some unknown command will display help.
2112The sysrq key reading is very picky ( I have to type the keys in an
2113 xterm session & paste them into the x3270 console )
2114& it may be wise to predefine the keys as described in the VM hints above
2115
2116This is particularly useful for syncing disks unmounting & rebooting
2117if the machine gets partially hung.
2118
2119Read Documentation/admin-guide/sysrq.rst for more info
2120
2121References:
2122===========
2123Enterprise Systems Architecture Reference Summary
2124Enterprise Systems Architecture Principles of Operation
2125Hartmut Penners s390 stack frame sheet.
2126IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
2127Various bits of man & info pages of Linux.
2128Linux & GDB source.
2129Various info & man pages.
2130CMS Help on tracing commands.
2131Linux for s/390 Elf Application Binary Interface
2132Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
2133z/Architecture Principles of Operation SA22-7832-00
2134Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
2135Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
2136
2137Special Thanks
2138==============
2139Special thanks to Neale Ferguson who maintains a much
2140prettier HTML version of this page at
2141http://linuxvm.org/penguinvm/
2142Bob Grainger Stefan Bader & others for reporting bugs
diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.rst
index 480a78ef5a1e..7006d8209d2e 100644
--- a/Documentation/s390/cds.txt
+++ b/Documentation/s390/cds.rst
@@ -1,14 +1,18 @@
1===========================
1Linux for S/390 and zSeries 2Linux for S/390 and zSeries
3===========================
2 4
3Common Device Support (CDS) 5Common Device Support (CDS)
4Device Driver I/O Support Routines 6Device Driver I/O Support Routines
5 7
6Authors : Ingo Adlung 8Authors:
7 Cornelia Huck 9 - Ingo Adlung
10 - Cornelia Huck
8 11
9Copyright, IBM Corp. 1999-2002 12Copyright, IBM Corp. 1999-2002
10 13
11Introduction 14Introduction
15============
12 16
13This document describes the common device support routines for Linux/390. 17This document describes the common device support routines for Linux/390.
14Different than other hardware architectures, ESA/390 has defined a unified 18Different than other hardware architectures, ESA/390 has defined a unified
@@ -27,18 +31,20 @@ Operation manual (IBM Form. No. SA22-7201).
27 31
28In order to build common device support for ESA/390 I/O interfaces, a 32In order to build common device support for ESA/390 I/O interfaces, a
29functional layer was introduced that provides generic I/O access methods to 33functional layer was introduced that provides generic I/O access methods to
30the hardware. 34the hardware.
31 35
32The common device support layer comprises the I/O support routines defined 36The common device support layer comprises the I/O support routines defined
33below. Some of them implement common Linux device driver interfaces, while 37below. Some of them implement common Linux device driver interfaces, while
34some of them are ESA/390 platform specific. 38some of them are ESA/390 platform specific.
35 39
36Note: 40Note:
37In order to write a driver for S/390, you also need to look into the interface 41 In order to write a driver for S/390, you also need to look into the interface
38described in Documentation/s390/driver-model.txt. 42 described in Documentation/s390/driver-model.rst.
39 43
40Note for porting drivers from 2.4: 44Note for porting drivers from 2.4:
45
41The major changes are: 46The major changes are:
47
42* The functions use a ccw_device instead of an irq (subchannel). 48* The functions use a ccw_device instead of an irq (subchannel).
43* All drivers must define a ccw_driver (see driver-model.txt) and the associated 49* All drivers must define a ccw_driver (see driver-model.txt) and the associated
44 functions. 50 functions.
@@ -57,19 +63,16 @@ The major changes are:
57ccw_device_get_ciw() 63ccw_device_get_ciw()
58 get commands from extended sense data. 64 get commands from extended sense data.
59 65
60ccw_device_start() 66ccw_device_start(), ccw_device_start_timeout(), ccw_device_start_key(), ccw_device_start_key_timeout()
61ccw_device_start_timeout()
62ccw_device_start_key()
63ccw_device_start_key_timeout()
64 initiate an I/O request. 67 initiate an I/O request.
65 68
66ccw_device_resume() 69ccw_device_resume()
67 resume channel program execution. 70 resume channel program execution.
68 71
69ccw_device_halt() 72ccw_device_halt()
70 terminate the current I/O request processed on the device. 73 terminate the current I/O request processed on the device.
71 74
72do_IRQ() 75do_IRQ()
73 generic interrupt routine. This function is called by the interrupt entry 76 generic interrupt routine. This function is called by the interrupt entry
74 routine whenever an I/O interrupt is presented to the system. The do_IRQ() 77 routine whenever an I/O interrupt is presented to the system. The do_IRQ()
75 routine determines the interrupt status and calls the device specific 78 routine determines the interrupt status and calls the device specific
@@ -82,12 +85,15 @@ first level interrupt handler only and does not comprise a device driver
82callable interface. Instead, the functional description of do_IO() also 85callable interface. Instead, the functional description of do_IO() also
83describes the input to the device specific interrupt handler. 86describes the input to the device specific interrupt handler.
84 87
85Note: All explanations apply also to the 64 bit architecture s390x. 88Note:
89 All explanations apply also to the 64 bit architecture s390x.
86 90
87 91
88Common Device Support (CDS) for Linux/390 Device Drivers 92Common Device Support (CDS) for Linux/390 Device Drivers
93========================================================
89 94
90General Information 95General Information
96-------------------
91 97
92The following chapters describe the I/O related interface routines the 98The following chapters describe the I/O related interface routines the
93Linux/390 common device support (CDS) provides to allow for device specific 99Linux/390 common device support (CDS) provides to allow for device specific
@@ -101,6 +107,7 @@ can be found in the architecture specific C header file
101linux/arch/s390/include/asm/irq.h. 107linux/arch/s390/include/asm/irq.h.
102 108
103Overview of CDS interface concepts 109Overview of CDS interface concepts
110----------------------------------
104 111
105Different to other hardware platforms, the ESA/390 architecture doesn't define 112Different to other hardware platforms, the ESA/390 architecture doesn't define
106interrupt lines managed by a specific interrupt controller and bus systems 113interrupt lines managed by a specific interrupt controller and bus systems
@@ -126,7 +133,7 @@ has to call every single device driver registered on this IRQ in order to
126determine the device driver owning the device that raised the interrupt. 133determine the device driver owning the device that raised the interrupt.
127 134
128Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel). 135Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel).
129For internal use of the common I/O layer, these are still there. However, 136For internal use of the common I/O layer, these are still there. However,
130device drivers should use the new calling interface via the ccw_device only. 137device drivers should use the new calling interface via the ccw_device only.
131 138
132During its startup the Linux/390 system checks for peripheral devices. Each 139During its startup the Linux/390 system checks for peripheral devices. Each
@@ -134,7 +141,7 @@ of those devices is uniquely defined by a so called subchannel by the ESA/390
134channel subsystem. While the subchannel numbers are system generated, each 141channel subsystem. While the subchannel numbers are system generated, each
135subchannel also takes a user defined attribute, the so called device number. 142subchannel also takes a user defined attribute, the so called device number.
136Both subchannel number and device number cannot exceed 65535. During sysfs 143Both subchannel number and device number cannot exceed 65535. During sysfs
137initialisation, the information about control unit type and device types that 144initialisation, the information about control unit type and device types that
138imply specific I/O commands (channel command words - CCWs) in order to operate 145imply specific I/O commands (channel command words - CCWs) in order to operate
139the device are gathered. Device drivers can retrieve this set of hardware 146the device are gathered. Device drivers can retrieve this set of hardware
140information during their initialization step to recognize the devices they 147information during their initialization step to recognize the devices they
@@ -164,18 +171,26 @@ get_ciw() - get command information word
164This call enables a device driver to get information about supported commands 171This call enables a device driver to get information about supported commands
165from the extended SenseID data. 172from the extended SenseID data.
166 173
167struct ciw * 174::
168ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
169 175
170cdev - The ccw_device for which the command is to be retrieved. 176 struct ciw *
171cmd - The command type to be retrieved. 177 ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
178
179==== ========================================================
180cdev The ccw_device for which the command is to be retrieved.
181cmd The command type to be retrieved.
182==== ========================================================
172 183
173ccw_device_get_ciw() returns: 184ccw_device_get_ciw() returns:
174NULL - No extended data available, invalid device or command not found.
175!NULL - The command requested.
176 185
186===== ================================================================
187 NULL No extended data available, invalid device or command not found.
188!NULL The command requested.
189===== ================================================================
190
191::
177 192
178ccw_device_start() - Initiate I/O Request 193 ccw_device_start() - Initiate I/O Request
179 194
180The ccw_device_start() routines is the I/O request front-end processor. All 195The ccw_device_start() routines is the I/O request front-end processor. All
181device driver I/O requests must be issued using this routine. A device driver 196device driver I/O requests must be issued using this routine. A device driver
@@ -186,93 +201,105 @@ This description also covers the status information passed to the device
186driver's interrupt handler as this is related to the rules (flags) defined 201driver's interrupt handler as this is related to the rules (flags) defined
187with the associated I/O request when calling ccw_device_start(). 202with the associated I/O request when calling ccw_device_start().
188 203
189int ccw_device_start(struct ccw_device *cdev, 204::
190 struct ccw1 *cpa, 205
191 unsigned long intparm, 206 int ccw_device_start(struct ccw_device *cdev,
192 __u8 lpm, 207 struct ccw1 *cpa,
193 unsigned long flags); 208 unsigned long intparm,
194int ccw_device_start_timeout(struct ccw_device *cdev, 209 __u8 lpm,
195 struct ccw1 *cpa, 210 unsigned long flags);
196 unsigned long intparm, 211 int ccw_device_start_timeout(struct ccw_device *cdev,
197 __u8 lpm, 212 struct ccw1 *cpa,
198 unsigned long flags, 213 unsigned long intparm,
199 int expires); 214 __u8 lpm,
200int ccw_device_start_key(struct ccw_device *cdev, 215 unsigned long flags,
201 struct ccw1 *cpa, 216 int expires);
202 unsigned long intparm, 217 int ccw_device_start_key(struct ccw_device *cdev,
203 __u8 lpm, 218 struct ccw1 *cpa,
204 __u8 key, 219 unsigned long intparm,
205 unsigned long flags); 220 __u8 lpm,
206int ccw_device_start_key_timeout(struct ccw_device *cdev, 221 __u8 key,
207 struct ccw1 *cpa, 222 unsigned long flags);
208 unsigned long intparm, 223 int ccw_device_start_key_timeout(struct ccw_device *cdev,
209 __u8 lpm, 224 struct ccw1 *cpa,
210 __u8 key, 225 unsigned long intparm,
211 unsigned long flags, 226 __u8 lpm,
212 int expires); 227 __u8 key,
213 228 unsigned long flags,
214cdev : ccw_device the I/O is destined for 229 int expires);
215cpa : logical start address of channel program 230
216user_intparm : user specific interrupt information; will be presented 231============= =============================================================
217 back to the device driver's interrupt handler. Allows a 232cdev ccw_device the I/O is destined for
218 device driver to associate the interrupt with a 233cpa logical start address of channel program
219 particular I/O request. 234user_intparm user specific interrupt information; will be presented
220lpm : defines the channel path to be used for a specific I/O 235 back to the device driver's interrupt handler. Allows a
221 request. A value of 0 will make cio use the opm. 236 device driver to associate the interrupt with a
222key : the storage key to use for the I/O (useful for operating on a 237 particular I/O request.
223 storage with a storage key != default key) 238lpm defines the channel path to be used for a specific I/O
224flag : defines the action to be performed for I/O processing 239 request. A value of 0 will make cio use the opm.
225expires : timeout value in jiffies. The common I/O layer will terminate 240key the storage key to use for the I/O (useful for operating on a
226 the running program after this and call the interrupt handler 241 storage with a storage key != default key)
227 with ERR_PTR(-ETIMEDOUT) as irb. 242flag defines the action to be performed for I/O processing
228 243expires timeout value in jiffies. The common I/O layer will terminate
229Possible flag values are : 244 the running program after this and call the interrupt handler
230 245 with ERR_PTR(-ETIMEDOUT) as irb.
231DOIO_ALLOW_SUSPEND - channel program may become suspended 246============= =============================================================
232DOIO_DENY_PREFETCH - don't allow for CCW prefetch; usually 247
233 this implies the channel program might 248Possible flag values are:
234 become modified 249
235DOIO_SUPPRESS_INTER - don't call the handler on intermediate status 250========================= =============================================
236 251DOIO_ALLOW_SUSPEND channel program may become suspended
237The cpa parameter points to the first format 1 CCW of a channel program : 252DOIO_DENY_PREFETCH don't allow for CCW prefetch; usually
238 253 this implies the channel program might
239struct ccw1 { 254 become modified
240 __u8 cmd_code;/* command code */ 255DOIO_SUPPRESS_INTER don't call the handler on intermediate status
241 __u8 flags; /* flags, like IDA addressing, etc. */ 256========================= =============================================
242 __u16 count; /* byte count */ 257
243 __u32 cda; /* data address */ 258The cpa parameter points to the first format 1 CCW of a channel program::
244} __attribute__ ((packed,aligned(8))); 259
245 260 struct ccw1 {
246with the following CCW flags values defined : 261 __u8 cmd_code;/* command code */
247 262 __u8 flags; /* flags, like IDA addressing, etc. */
248CCW_FLAG_DC - data chaining 263 __u16 count; /* byte count */
249CCW_FLAG_CC - command chaining 264 __u32 cda; /* data address */
250CCW_FLAG_SLI - suppress incorrect length 265 } __attribute__ ((packed,aligned(8)));
251CCW_FLAG_SKIP - skip 266
252CCW_FLAG_PCI - PCI 267with the following CCW flags values defined:
253CCW_FLAG_IDA - indirect addressing 268
254CCW_FLAG_SUSPEND - suspend 269=================== =========================
270CCW_FLAG_DC data chaining
271CCW_FLAG_CC command chaining
272CCW_FLAG_SLI suppress incorrect length
273CCW_FLAG_SKIP skip
274CCW_FLAG_PCI PCI
275CCW_FLAG_IDA indirect addressing
276CCW_FLAG_SUSPEND suspend
277=================== =========================
255 278
256 279
257Via ccw_device_set_options(), the device driver may specify the following 280Via ccw_device_set_options(), the device driver may specify the following
258options for the device: 281options for the device:
259 282
260DOIO_EARLY_NOTIFICATION - allow for early interrupt notification 283========================= ======================================
261DOIO_REPORT_ALL - report all interrupt conditions 284DOIO_EARLY_NOTIFICATION allow for early interrupt notification
285DOIO_REPORT_ALL report all interrupt conditions
286========================= ======================================
262 287
263 288
264The ccw_device_start() function returns : 289The ccw_device_start() function returns:
265 290
266 0 - successful completion or request successfully initiated 291======== ======================================================================
267-EBUSY - The device is currently processing a previous I/O request, or there is 292 0 successful completion or request successfully initiated
268 a status pending at the device. 293 -EBUSY The device is currently processing a previous I/O request, or there is
269-ENODEV - cdev is invalid, the device is not operational or the ccw_device is 294 a status pending at the device.
270 not online. 295-ENODEV cdev is invalid, the device is not operational or the ccw_device is
296 not online.
297======== ======================================================================
271 298
272When the I/O request completes, the CDS first level interrupt handler will 299When the I/O request completes, the CDS first level interrupt handler will
273accumulate the status in a struct irb and then call the device interrupt handler. 300accumulate the status in a struct irb and then call the device interrupt handler.
274The intparm field will contain the value the device driver has associated with a 301The intparm field will contain the value the device driver has associated with a
275particular I/O request. If a pending device status was recognized, 302particular I/O request. If a pending device status was recognized,
276intparm will be set to 0 (zero). This may happen during I/O initiation or delayed 303intparm will be set to 0 (zero). This may happen during I/O initiation or delayed
277by an alert status notification. In any case this status is not related to the 304by an alert status notification. In any case this status is not related to the
278current (last) I/O request. In case of a delayed status notification no special 305current (last) I/O request. In case of a delayed status notification no special
@@ -282,9 +309,11 @@ never started, even though ccw_device_start() returned with successful completio
282The irb may contain an error value, and the device driver should check for this 309The irb may contain an error value, and the device driver should check for this
283first: 310first:
284 311
285-ETIMEDOUT: the common I/O layer terminated the request after the specified 312========== =================================================================
286 timeout value 313-ETIMEDOUT the common I/O layer terminated the request after the specified
287-EIO: the common I/O layer terminated the request due to an error state 314 timeout value
315-EIO the common I/O layer terminated the request due to an error state
316========== =================================================================
288 317
289If the concurrent sense flag in the extended status word (esw) in the irb is 318If the concurrent sense flag in the extended status word (esw) in the irb is
290set, the field erw.scnt in the esw describes the number of device specific 319set, the field erw.scnt in the esw describes the number of device specific
@@ -294,6 +323,7 @@ sensing by the device driver itself is required.
294The device interrupt handler can use the following definitions to investigate 323The device interrupt handler can use the following definitions to investigate
295the primary unit check source coded in sense byte 0 : 324the primary unit check source coded in sense byte 0 :
296 325
326======================= ====
297SNS0_CMD_REJECT 0x80 327SNS0_CMD_REJECT 0x80
298SNS0_INTERVENTION_REQ 0x40 328SNS0_INTERVENTION_REQ 0x40
299SNS0_BUS_OUT_CHECK 0x20 329SNS0_BUS_OUT_CHECK 0x20
@@ -301,36 +331,41 @@ SNS0_EQUIPMENT_CHECK 0x10
301SNS0_DATA_CHECK 0x08 331SNS0_DATA_CHECK 0x08
302SNS0_OVERRUN 0x04 332SNS0_OVERRUN 0x04
303SNS0_INCOMPL_DOMAIN 0x01 333SNS0_INCOMPL_DOMAIN 0x01
334======================= ====
304 335
305Depending on the device status, multiple of those values may be set together. 336Depending on the device status, multiple of those values may be set together.
306Please refer to the device specific documentation for details. 337Please refer to the device specific documentation for details.
307 338
308The irb->scsw.cstat field provides the (accumulated) subchannel status : 339The irb->scsw.cstat field provides the (accumulated) subchannel status :
309 340
310SCHN_STAT_PCI - program controlled interrupt 341========================= ============================
311SCHN_STAT_INCORR_LEN - incorrect length 342SCHN_STAT_PCI program controlled interrupt
312SCHN_STAT_PROG_CHECK - program check 343SCHN_STAT_INCORR_LEN incorrect length
313SCHN_STAT_PROT_CHECK - protection check 344SCHN_STAT_PROG_CHECK program check
314SCHN_STAT_CHN_DATA_CHK - channel data check 345SCHN_STAT_PROT_CHECK protection check
315SCHN_STAT_CHN_CTRL_CHK - channel control check 346SCHN_STAT_CHN_DATA_CHK channel data check
316SCHN_STAT_INTF_CTRL_CHK - interface control check 347SCHN_STAT_CHN_CTRL_CHK channel control check
317SCHN_STAT_CHAIN_CHECK - chaining check 348SCHN_STAT_INTF_CTRL_CHK interface control check
349SCHN_STAT_CHAIN_CHECK chaining check
350========================= ============================
318 351
319The irb->scsw.dstat field provides the (accumulated) device status : 352The irb->scsw.dstat field provides the (accumulated) device status :
320 353
321DEV_STAT_ATTENTION - attention 354===================== =================
322DEV_STAT_STAT_MOD - status modifier 355DEV_STAT_ATTENTION attention
323DEV_STAT_CU_END - control unit end 356DEV_STAT_STAT_MOD status modifier
324DEV_STAT_BUSY - busy 357DEV_STAT_CU_END control unit end
325DEV_STAT_CHN_END - channel end 358DEV_STAT_BUSY busy
326DEV_STAT_DEV_END - device end 359DEV_STAT_CHN_END channel end
327DEV_STAT_UNIT_CHECK - unit check 360DEV_STAT_DEV_END device end
328DEV_STAT_UNIT_EXCEP - unit exception 361DEV_STAT_UNIT_CHECK unit check
362DEV_STAT_UNIT_EXCEP unit exception
363===================== =================
329 364
330Please see the ESA/390 Principles of Operation manual for details on the 365Please see the ESA/390 Principles of Operation manual for details on the
331individual flag meanings. 366individual flag meanings.
332 367
333Usage Notes : 368Usage Notes:
334 369
335ccw_device_start() must be called disabled and with the ccw device lock held. 370ccw_device_start() must be called disabled and with the ccw device lock held.
336 371
@@ -374,32 +409,39 @@ secondary status without error (alert status) is presented, this indicates
374successful completion for all overlapping ccw_device_start() requests that have 409successful completion for all overlapping ccw_device_start() requests that have
375been issued since the last secondary (final) status. 410been issued since the last secondary (final) status.
376 411
377Channel programs that intend to set the suspend flag on a channel command word 412Channel programs that intend to set the suspend flag on a channel command word
378(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the 413(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
379suspend flag will cause a channel program check. At the time the channel program 414suspend flag will cause a channel program check. At the time the channel program
380becomes suspended an intermediate interrupt will be generated by the channel 415becomes suspended an intermediate interrupt will be generated by the channel
381subsystem. 416subsystem.
382 417
383ccw_device_resume() - Resume Channel Program Execution 418ccw_device_resume() - Resume Channel Program Execution
384 419
385If a device driver chooses to suspend the current channel program execution by 420If a device driver chooses to suspend the current channel program execution by
386setting the CCW suspend flag on a particular CCW, the channel program execution 421setting the CCW suspend flag on a particular CCW, the channel program execution
387is suspended. In order to resume channel program execution the CIO layer 422is suspended. In order to resume channel program execution the CIO layer
388provides the ccw_device_resume() routine. 423provides the ccw_device_resume() routine.
389 424
390int ccw_device_resume(struct ccw_device *cdev); 425::
391 426
392cdev - ccw_device the resume operation is requested for 427 int ccw_device_resume(struct ccw_device *cdev);
428
429==== ================================================
430cdev ccw_device the resume operation is requested for
431==== ================================================
393 432
394The ccw_device_resume() function returns: 433The ccw_device_resume() function returns:
395 434
396 0 - suspended channel program is resumed 435========= ==============================================
397-EBUSY - status pending 436 0 suspended channel program is resumed
398-ENODEV - cdev invalid or not-operational subchannel 437 -EBUSY status pending
399-EINVAL - resume function not applicable 438 -ENODEV cdev invalid or not-operational subchannel
400-ENOTCONN - there is no I/O request pending for completion 439 -EINVAL resume function not applicable
440-ENOTCONN there is no I/O request pending for completion
441========= ==============================================
401 442
402Usage Notes: 443Usage Notes:
444
403Please have a look at the ccw_device_start() usage notes for more details on 445Please have a look at the ccw_device_start() usage notes for more details on
404suspended channel programs. 446suspended channel programs.
405 447
@@ -412,22 +454,28 @@ command is provided.
412 454
413ccw_device_halt() must be called disabled and with the ccw device lock held. 455ccw_device_halt() must be called disabled and with the ccw device lock held.
414 456
415int ccw_device_halt(struct ccw_device *cdev, 457::
416 unsigned long intparm); 458
459 int ccw_device_halt(struct ccw_device *cdev,
460 unsigned long intparm);
417 461
418cdev : ccw_device the halt operation is requested for 462======= =====================================================
419intparm : interruption parameter; value is only used if no I/O 463cdev ccw_device the halt operation is requested for
420 is outstanding, otherwise the intparm associated with 464intparm interruption parameter; value is only used if no I/O
421 the I/O request is returned 465 is outstanding, otherwise the intparm associated with
466 the I/O request is returned
467======= =====================================================
422 468
423The ccw_device_halt() function returns : 469The ccw_device_halt() function returns:
424 470
425 0 - request successfully initiated 471======= ==============================================================
426-EBUSY - the device is currently busy, or status pending. 472 0 request successfully initiated
427-ENODEV - cdev invalid. 473-EBUSY the device is currently busy, or status pending.
428-EINVAL - The device is not operational or the ccw device is not online. 474-ENODEV cdev invalid.
475-EINVAL The device is not operational or the ccw device is not online.
476======= ==============================================================
429 477
430Usage Notes : 478Usage Notes:
431 479
432A device driver may write a never-ending channel program by writing a channel 480A device driver may write a never-ending channel program by writing a channel
433program that at its end loops back to its beginning by means of a transfer in 481program that at its end loops back to its beginning by means of a transfer in
@@ -438,25 +486,34 @@ can then perform an appropriate action. Prior to interrupt of an outstanding
438read to a network device (with or without PCI flag) a ccw_device_halt() 486read to a network device (with or without PCI flag) a ccw_device_halt()
439is required to end the pending operation. 487is required to end the pending operation.
440 488
441ccw_device_clear() - Terminage I/O Request Processing 489::
490
491 ccw_device_clear() - Terminage I/O Request Processing
442 492
443In order to terminate all I/O processing at the subchannel, the clear subchannel 493In order to terminate all I/O processing at the subchannel, the clear subchannel
444(CSCH) command is used. It can be issued via ccw_device_clear(). 494(CSCH) command is used. It can be issued via ccw_device_clear().
445 495
446ccw_device_clear() must be called disabled and with the ccw device lock held. 496ccw_device_clear() must be called disabled and with the ccw device lock held.
447 497
448int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm); 498::
499
500 int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
449 501
450cdev: ccw_device the clear operation is requested for 502======= ===============================================
451intparm: interruption parameter (see ccw_device_halt()) 503cdev ccw_device the clear operation is requested for
504intparm interruption parameter (see ccw_device_halt())
505======= ===============================================
452 506
453The ccw_device_clear() function returns: 507The ccw_device_clear() function returns:
454 508
455 0 - request successfully initiated 509======= ==============================================================
456-ENODEV - cdev invalid 510 0 request successfully initiated
457-EINVAL - The device is not operational or the ccw device is not online. 511-ENODEV cdev invalid
512-EINVAL The device is not operational or the ccw device is not online.
513======= ==============================================================
458 514
459Miscellaneous Support Routines 515Miscellaneous Support Routines
516------------------------------
460 517
461This chapter describes various routines to be used in a Linux/390 device 518This chapter describes various routines to be used in a Linux/390 device
462driver programming environment. 519driver programming environment.
@@ -466,7 +523,8 @@ get_ccwdev_lock()
466Get the address of the device specific lock. This is then used in 523Get the address of the device specific lock. This is then used in
467spin_lock() / spin_unlock() calls. 524spin_lock() / spin_unlock() calls.
468 525
526::
469 527
470__u8 ccw_device_get_path_mask(struct ccw_device *cdev); 528 __u8 ccw_device_get_path_mask(struct ccw_device *cdev);
471 529
472Get the mask of the path currently available for cdev. 530Get the mask of the path currently available for cdev.
diff --git a/Documentation/s390/CommonIO b/Documentation/s390/common_io.rst
index 6e0f63f343b4..846485681ce7 100644
--- a/Documentation/s390/CommonIO
+++ b/Documentation/s390/common_io.rst
@@ -1,5 +1,9 @@
1S/390 common I/O-Layer - command line parameters, procfs and debugfs entries 1======================
2============================================================================ 2S/390 common I/O-Layer
3======================
4
5command line parameters, procfs and debugfs entries
6===================================================
3 7
4Command line parameters 8Command line parameters
5----------------------- 9-----------------------
@@ -13,7 +17,7 @@ Command line parameters
13 device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>} 17 device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
14 18
15 The given devices will be ignored by the common I/O-layer; no detection 19 The given devices will be ignored by the common I/O-layer; no detection
16 and device sensing will be done on any of those devices. The subchannel to 20 and device sensing will be done on any of those devices. The subchannel to
17 which the device in question is attached will be treated as if no device was 21 which the device in question is attached will be treated as if no device was
18 attached. 22 attached.
19 23
@@ -28,14 +32,20 @@ Command line parameters
28 keywords can be used to refer to the CCW based boot device and CCW console 32 keywords can be used to refer to the CCW based boot device and CCW console
29 device respectively (these are probably useful only when combined with the '!' 33 device respectively (these are probably useful only when combined with the '!'
30 operator). The '!' operator will cause the I/O-layer to _not_ ignore a device. 34 operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
31 The command line is parsed from left to right. 35 The command line
36 is parsed from left to right.
37
38 For example::
32 39
33 For example,
34 cio_ignore=0.0.0023-0.0.0042,0.0.4711 40 cio_ignore=0.0.0023-0.0.0042,0.0.4711
41
35 will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device 42 will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device
36 0.0.4711, if detected. 43 0.0.4711, if detected.
37 As another example, 44
45 As another example::
46
38 cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02 47 cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02
48
39 will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02. 49 will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02.
40 50
41 By default, no devices are ignored. 51 By default, no devices are ignored.
@@ -48,40 +58,45 @@ Command line parameters
48 58
49 Lists the ranges of devices (by bus id) which are ignored by common I/O. 59 Lists the ranges of devices (by bus id) which are ignored by common I/O.
50 60
51 You can un-ignore certain or all devices by piping to /proc/cio_ignore. 61 You can un-ignore certain or all devices by piping to /proc/cio_ignore.
52 "free all" will un-ignore all ignored devices, 62 "free all" will un-ignore all ignored devices,
53 "free <device range>, <device range>, ..." will un-ignore the specified 63 "free <device range>, <device range>, ..." will un-ignore the specified
54 devices. 64 devices.
55 65
56 For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored, 66 For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored,
67
57 - echo free 0.0.0030-0.0.0032 > /proc/cio_ignore 68 - echo free 0.0.0030-0.0.0032 > /proc/cio_ignore
58 will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023 69 will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023
59 to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored; 70 to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored;
60 - echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device 71 - echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device
61 0.0.0041; 72 0.0.0041;
62 - echo free all > /proc/cio_ignore will un-ignore all remaining ignored 73 - echo free all > /proc/cio_ignore will un-ignore all remaining ignored
63 devices. 74 devices.
64 75
65 When a device is un-ignored, device recognition and sensing is performed and 76 When a device is un-ignored, device recognition and sensing is performed and
66 the device driver will be notified if possible, so the device will become 77 the device driver will be notified if possible, so the device will become
67 available to the system. Note that un-ignoring is performed asynchronously. 78 available to the system. Note that un-ignoring is performed asynchronously.
68 79
69 You can also add ranges of devices to be ignored by piping to 80 You can also add ranges of devices to be ignored by piping to
70 /proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the 81 /proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the
71 specified devices. 82 specified devices.
72 83
73 Note: While already known devices can be added to the list of devices to be 84 Note: While already known devices can be added to the list of devices to be
74 ignored, there will be no effect on then. However, if such a device 85 ignored, there will be no effect on then. However, if such a device
75 disappears and then reappears, it will then be ignored. To make 86 disappears and then reappears, it will then be ignored. To make
76 known devices go away, you need the "purge" command (see below). 87 known devices go away, you need the "purge" command (see below).
77 88
78 For example, 89 For example::
90
79 "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore" 91 "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
92
80 will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored 93 will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
81 devices. 94 devices.
82 95
83 You can remove already known but now ignored devices via 96 You can remove already known but now ignored devices via::
97
84 "echo purge > /proc/cio_ignore" 98 "echo purge > /proc/cio_ignore"
99
85 All devices ignored but still registered and not online (= not in use) 100 All devices ignored but still registered and not online (= not in use)
86 will be deregistered and thus removed from the system. 101 will be deregistered and thus removed from the system.
87 102
@@ -115,11 +130,11 @@ debugfs entries
115 Various debug messages from the common I/O-layer. 130 Various debug messages from the common I/O-layer.
116 131
117 - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii 132 - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
118 Logs the calling of functions in the common I/O-layer and, if applicable, 133 Logs the calling of functions in the common I/O-layer and, if applicable,
119 which subchannel they were called for, as well as dumps of some data 134 which subchannel they were called for, as well as dumps of some data
120 structures (like irb in an error case). 135 structures (like irb in an error case).
121 136
122 The level of logging can be changed to be more or less verbose by piping to 137 The level of logging can be changed to be more or less verbose by piping to
123 /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the 138 /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
124 documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt) 139 documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst)
125 for details. 140 for details.
diff --git a/Documentation/s390/DASD b/Documentation/s390/dasd.rst
index 9963f1e9c98a..9e22247285c8 100644
--- a/Documentation/s390/DASD
+++ b/Documentation/s390/dasd.rst
@@ -1,4 +1,6 @@
1==================
1DASD device driver 2DASD device driver
3==================
2 4
3S/390's disk devices (DASDs) are managed by Linux via the DASD device 5S/390's disk devices (DASDs) are managed by Linux via the DASD device
4driver. It is valid for all types of DASDs and represents them to 6driver. It is valid for all types of DASDs and represents them to
@@ -14,14 +16,14 @@ parameters are to be given in hexadecimal notation without a leading
14If you supply kernel parameters the different instances are processed 16If you supply kernel parameters the different instances are processed
15in order of appearance and a minor number is reserved for any device 17in order of appearance and a minor number is reserved for any device
16covered by the supplied range up to 64 volumes. Additional DASDs are 18covered by the supplied range up to 64 volumes. Additional DASDs are
17ignored. If you do not supply the 'dasd=' kernel parameter at all, the 19ignored. If you do not supply the 'dasd=' kernel parameter at all, the
18DASD driver registers all supported DASDs of your system to a minor 20DASD driver registers all supported DASDs of your system to a minor
19number in ascending order of the subchannel number. 21number in ascending order of the subchannel number.
20 22
21The driver currently supports ECKD-devices and there are stubs for 23The driver currently supports ECKD-devices and there are stubs for
22support of the FBA and CKD architectures. For the FBA architecture 24support of the FBA and CKD architectures. For the FBA architecture
23only some smart data structures are missing to make the support 25only some smart data structures are missing to make the support
24complete. 26complete.
25We performed our testing on 3380 and 3390 type disks of different 27We performed our testing on 3380 and 3390 type disks of different
26sizes, under VM and on the bare hardware (LPAR), using internal disks 28sizes, under VM and on the bare hardware (LPAR), using internal disks
27of the multiprise as well as a RAMAC virtual array. Disks exported by 29of the multiprise as well as a RAMAC virtual array. Disks exported by
@@ -34,19 +36,22 @@ accessibility of the DASD from other OSs. In a later stage we will
34provide support of partitions, maybe VTOC oriented or using a kind of 36provide support of partitions, maybe VTOC oriented or using a kind of
35partition table in the label record. 37partition table in the label record.
36 38
37USAGE 39Usage
40=====
38 41
39-Low-level format (?CKD only) 42-Low-level format (?CKD only)
40For using an ECKD-DASD as a Linux harddisk you have to low-level 43For using an ECKD-DASD as a Linux harddisk you have to low-level
41format the tracks by issuing the BLKDASDFORMAT-ioctl on that 44format the tracks by issuing the BLKDASDFORMAT-ioctl on that
42device. This will erase any data on that volume including IBM volume 45device. This will erase any data on that volume including IBM volume
43labels, VTOCs etc. The ioctl may take a 'struct format_data *' or 46labels, VTOCs etc. The ioctl may take a `struct format_data *` or
44'NULL' as an argument. 47'NULL' as an argument::
45typedef struct { 48
49 typedef struct {
46 int start_unit; 50 int start_unit;
47 int stop_unit; 51 int stop_unit;
48 int blksize; 52 int blksize;
49} format_data_t; 53 } format_data_t;
54
50When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole 55When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole
51disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit 56disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit
52and stop_unit are the first and last track to be formatted. If 57and stop_unit are the first and last track to be formatted. If
@@ -56,17 +61,23 @@ up to the last track. blksize can be any power of two between 512 and
561kB blocks anyway and you gain approx. 50% of capacity increasing your 611kB blocks anyway and you gain approx. 50% of capacity increasing your
57blksize from 512 byte to 1kB. 62blksize from 512 byte to 1kB.
58 63
59-Make a filesystem 64Make a filesystem
65=================
66
60Then you can mk??fs the filesystem of your choice on that volume or 67Then you can mk??fs the filesystem of your choice on that volume or
61partition. For reasons of sanity you should build your filesystem on 68partition. For reasons of sanity you should build your filesystem on
62the partition /dev/dd?1 instead of the whole volume. You only lose 3kB 69the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
63but may be sure that you can reuse your data after introduction of a 70but may be sure that you can reuse your data after introduction of a
64real partition table. 71real partition table.
65 72
66BUGS: 73Bugs
74====
75
67- Performance sometimes is rather low because we don't fully exploit clustering 76- Performance sometimes is rather low because we don't fully exploit clustering
68 77
69TODO-List: 78TODO-List
79=========
80
70- Add IBM'S Disk layout to genhd 81- Add IBM'S Disk layout to genhd
71- Enhance driver to use more than one major number 82- Enhance driver to use more than one major number
72- Enable usage as a module 83- Enable usage as a module
diff --git a/Documentation/s390/debugging390.rst b/Documentation/s390/debugging390.rst
new file mode 100644
index 000000000000..d49305fd5e1a
--- /dev/null
+++ b/Documentation/s390/debugging390.rst
@@ -0,0 +1,2613 @@
1=============================================
2Debugging on Linux for s/390 & z/Architecture
3=============================================
4
5Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
6
7Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
8
9.. Best viewed with fixed width fonts
10
11Overview of Document:
12=====================
13This document is intended to give a good overview of how to debug Linux for
14s/390 and z/Architecture. It is not intended as a complete reference and not a
15tutorial on the fundamentals of C & assembly. It doesn't go into
16390 IO in any detail. It is intended to complement the documents in the
17reference section below & any other worthwhile references you get.
18
19It is intended like the Enterprise Systems Architecture/390 Reference Summary
20to be printed out & used as a quick cheat sheet self help style reference when
21problems occur.
22
23.. Contents
24 ========
25 Register Set
26 Address Spaces on Intel Linux
27 Address Spaces on Linux for s/390 & z/Architecture
28 The Linux for s/390 & z/Architecture Kernel Task Structure
29 Register Usage & Stackframes on Linux for s/390 & z/Architecture
30 A sample program with comments
31 Compiling programs for debugging on Linux for s/390 & z/Architecture
32 Debugging under VM
33 s/390 & z/Architecture IO Overview
34 Debugging IO on s/390 & z/Architecture under VM
35 GDB on s/390 & z/Architecture
36 Stack chaining in gdb by hand
37 Examining core dumps
38 ldd
39 Debugging modules
40 The proc file system
41 SysRq
42 References
43 Special Thanks
44
45Register Set
46============
47The current architectures have the following registers.
48
4916 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
50r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
51
5216 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
53kernel usage only, used for memory management, interrupt control, debugging
54control etc.
55
5616 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
57normally not used by normal programs but potentially could be used as
58temporary storage. These registers have a 1:1 association with general
59purpose registers and are designed to be used in the so-called access
60register mode to select different address spaces.
61Access register 0 (and access register 1 on z/Architecture, which needs a
6264 bit pointer) is currently used by the pthread library as a pointer to
63the current running threads private area.
64
6516 64-bit floating point registers (fp0-fp15 ) IEEE & HFP floating
66point format compliant on G5 upwards & a Floating point control reg (FPC)
67
684 64-bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
69
70Note:
71 Linux (currently) always uses IEEE & emulates G5 IEEE format on older
72 machines, ( provided the kernel is configured for this ).
73
74
75The PSW is the most important register on the machine it
76is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of
77a program counter (pc), condition code register,memory space designator.
78In IBM standard notation I am counting bit 0 as the MSB.
79It has several advantages over a normal program counter
80in that you can change address translation & program counter
81in a single instruction. To change address translation,
82e.g. switching address translation off requires that you
83have a logical=physical mapping for the address you are
84currently running at.
85
86+-------------------------+-------------------------------------------------+
87| Bit | |
88+--------+----------------+ Value |
89| s/390 | z/Architecture | |
90+========+================+=================================================+
91| 0 | 0 | Reserved (must be 0) otherwise specification |
92| | | exception occurs. |
93+--------+----------------+-------------------------------------------------+
94| 1 | 1 | Program Event Recording 1 PER enabled, |
95| | | PER is used to facilitate debugging e.g. |
96| | | single stepping. |
97+--------+----------------+-------------------------------------------------+
98| 2-4 | 2-4 | Reserved (must be 0). |
99+--------+----------------+-------------------------------------------------+
100| 5 | 5 | Dynamic address translation 1=DAT on. |
101+--------+----------------+-------------------------------------------------+
102| 6 | 6 | Input/Output interrupt Mask |
103+--------+----------------+-------------------------------------------------+
104| 7 | 7 | External interrupt Mask used primarily for |
105| | | interprocessor signalling and clock interrupts. |
106+--------+----------------+-------------------------------------------------+
107| 8-11 | 8-11 | PSW Key used for complex memory protection |
108| | | mechanism (not used under linux) |
109+--------+----------------+-------------------------------------------------+
110| 12 | 12 | 1 on s/390 0 on z/Architecture |
111+--------+----------------+-------------------------------------------------+
112| 13 | 13 | Machine Check Mask 1=enable machine check |
113| | | interrupts |
114+--------+----------------+-------------------------------------------------+
115| 14 | 14 | Wait State. Set this to 1 to stop the processor |
116| | | except for interrupts and give time to other |
117| | | LPARS. Used in CPU idle in the kernel to |
118| | | increase overall usage of processor resources. |
119+--------+----------------+-------------------------------------------------+
120| 15 | 15 | Problem state (if set to 1 certain instructions |
121| | | are disabled). All linux user programs run with |
122| | | this bit 1 (useful info for debugging under VM).|
123+--------+----------------+-------------------------------------------------+
124| 16-17 | 16-17 | Address Space Control |
125| | | |
126| | | 00 Primary Space Mode: |
127| | | |
128| | | The register CR1 contains the primary |
129| | | address-space control element (PASCE), which |
130| | | points to the primary space region/segment |
131| | | table origin. |
132| | | |
133| | | 01 Access register mode |
134| | | |
135| | | 10 Secondary Space Mode: |
136| | | |
137| | | The register CR7 contains the secondary |
138| | | address-space control element (SASCE), which |
139| | | points to the secondary space region or |
140| | | segment table origin. |
141| | | |
142| | | 11 Home Space Mode: |
143| | | |
144| | | The register CR13 contains the home space |
145| | | address-space control element (HASCE), which |
146| | | points to the home space region/segment |
147| | | table origin. |
148| | | |
149| | | See "Address Spaces on Linux for s/390 & |
150| | | z/Architecture" below for more information |
151| | | about address space usage in Linux. |
152+--------+----------------+-------------------------------------------------+
153| 18-19 | 18-19 | Condition codes (CC) |
154+--------+----------------+-------------------------------------------------+
155| 20 | 20 | Fixed point overflow mask if 1=FPU exceptions |
156| | | for this event occur (normally 0) |
157+--------+----------------+-------------------------------------------------+
158| 21 | 21 | Decimal overflow mask if 1=FPU exceptions for |
159| | | this event occur (normally 0) |
160+--------+----------------+-------------------------------------------------+
161| 22 | 22 | Exponent underflow mask if 1=FPU exceptions |
162| | | for this event occur (normally 0) |
163+--------+----------------+-------------------------------------------------+
164| 23 | 23 | Significance Mask if 1=FPU exceptions for this |
165| | | event occur (normally 0) |
166+--------+----------------+-------------------------------------------------+
167| 24-31 | 24-30 | Reserved Must be 0. |
168| +----------------+-------------------------------------------------+
169| | 31 | Extended Addressing Mode |
170| +----------------+-------------------------------------------------+
171| | 32 | Basic Addressing Mode |
172| | | |
173| | | Used to set addressing mode |
174| | | |
175| | | +---------+----------+----------+ |
176| | | | PSW 31 | PSW 32 | | |
177| | | +---------+----------+----------+ |
178| | | | 0 | 0 | 24 bit | |
179| | | +---------+----------+----------+ |
180| | | | 0 | 1 | 31 bit | |
181| | | +---------+----------+----------+ |
182| | | | 1 | 1 | 64 bit | |
183| | | +---------+----------+----------+ |
184+--------+----------------+-------------------------------------------------+
185| 32 | | 1=31 bit addressing mode 0=24 bit addressing |
186| | | mode (for backward compatibility), linux |
187| | | always runs with this bit set to 1 |
188+--------+----------------+-------------------------------------------------+
189| 33-64 | | Instruction address. |
190| +----------------+-------------------------------------------------+
191| | 33-63 | Reserved must be 0 |
192| +----------------+-------------------------------------------------+
193| | 64-127 | Address |
194| | | |
195| | | - In 24 bits mode bits 64-103=0 bits 104-127 |
196| | | Address |
197| | | - In 31 bits mode bits 64-96=0 bits 97-127 |
198| | | Address |
199| | | |
200| | | Note: |
201| | | unlike 31 bit mode on s/390 bit 96 must be |
202| | | zero when loading the address with LPSWE |
203| | | otherwise a specification exception occurs, |
204| | | LPSW is fully backward compatible. |
205+--------+----------------+-------------------------------------------------+
206
207Prefix Page(s)
208--------------
209This per cpu memory area is too intimately tied to the processor not to mention.
210It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
211z/Architecture and is exchanged with one page on s/390 or two pages on
212z/Architecture in absolute storage by the set prefix instruction during Linux
213startup.
214
215This page is mapped to a different prefix for each processor in an SMP
216configuration (assuming the OS designer is sane of course).
217
218Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
219z/Architecture are used by the processor itself for holding such information
220as exception indications and entry points for exceptions.
221
222Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
223z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
2240x1000, too, which is used by Linux).
225
226The closest thing to this on traditional architectures is the interrupt
227vector table. This is a good thing & does simplify some of the kernel coding
228however it means that we now cannot catch stray NULL pointers in the
229kernel without hard coded checks.
230
231
232
233Address Spaces on Intel Linux
234=============================
235
236The traditional Intel Linux is approximately mapped as follows forgive
237the ascii art::
238
239 0xFFFFFFFF 4GB Himem *****************
240 * *
241 * Kernel Space *
242 * *
243 ***************** ****************
244 User Space Himem * User Stack * * *
245 (typically 0xC0000000 3GB ) ***************** * *
246 * Shared Libs * * Next Process *
247 ***************** * to *
248 * * <== * Run * <==
249 * User Program * * *
250 * Data BSS * * *
251 * Text * * *
252 * Sections * * *
253 0x00000000 ***************** ****************
254
255Now it is easy to see that on Intel it is quite easy to recognise a kernel
256address as being one greater than user space himem (in this case 0xC0000000),
257and addresses of less than this are the ones in the current running program on
258this processor (if an smp box).
259
260If using the virtual machine ( VM ) as a debugger it is quite difficult to
261know which user process is running as the address space you are looking at
262could be from any process in the run queue.
263
264The limitation of Intels addressing technique is that the linux
265kernel uses a very simple real address to virtual addressing technique
266of Real Address=Virtual Address-User Space Himem.
267This means that on Intel the kernel linux can typically only address
268Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
269can typically use.
270
271They can lower User Himem to 2GB or lower & thus be
272able to use 2GB of RAM however this shrinks the maximum size
273of User Space from 3GB to 2GB they have a no win limit of 4GB unless
274they go to 64 Bit.
275
276
277On 390 our limitations & strengths make us slightly different.
278For backward compatibility we are only allowed use 31 bits (2GB)
279of our 32 bit addresses, however, we use entirely separate address
280spaces for the user & kernel.
281
282This means we can support 2GB of non Extended RAM on s/390, & more
283with the Extended memory management swap device &
284currently 4TB of physical memory currently on z/Architecture.
285
286
287Address Spaces on Linux for s/390 & z/Architecture
288==================================================
289
290Our addressing scheme is basically as follows::
291
292 Primary Space Home Space
293 Himem 0x7fffffff 2GB on s/390 ***************** ****************
294 currently 0x3ffffffffff (2^42)-1 * User Stack * * *
295 on z/Architecture. ***************** * *
296 * Shared Libs * * *
297 ***************** * *
298 * * * Kernel *
299 * User Program * * *
300 * Data BSS * * *
301 * Text * * *
302 * Sections * * *
303 0x00000000 ***************** ****************
304
305This also means that we need to look at the PSW problem state bit and the
306addressing mode to decide whether we are looking at user or kernel space.
307
308User space runs in primary address mode (or access register mode within
309the vdso code).
310
311The kernel usually also runs in home space mode, however when accessing
312user space the kernel switches to primary or secondary address mode if
313the mvcos instruction is not available or if a compare-and-swap (futex)
314instruction on a user space address is performed.
315
316When also looking at the ASCE control registers, this means:
317
318User space:
319
320- runs in primary or access register mode
321- cr1 contains the user asce
322- cr7 contains the user asce
323- cr13 contains the kernel asce
324
325Kernel space:
326
327- runs in home space mode
328- cr1 contains the user or kernel asce
329
330 - the kernel asce is loaded when a uaccess requires primary or
331 secondary address mode
332
333- cr7 contains the user or kernel asce, (changed with set_fs())
334- cr13 contains the kernel asce
335
336In case of uaccess the kernel changes to:
337
338- primary space mode in case of a uaccess (copy_to_user) and uses
339 e.g. the mvcp instruction to access user space. However the kernel
340 will stay in home space mode if the mvcos instruction is available
341- secondary space mode in case of futex atomic operations, so that the
342 instructions come from primary address space and data from secondary
343 space
344
345In case of KVM, the kernel runs in home space mode, but cr1 gets switched
346to contain the gmap asce before the SIE instruction gets executed. When
347the SIE instruction is finished, cr1 will be switched back to contain the
348user asce.
349
350
351Virtual Addresses on s/390 & z/Architecture
352===========================================
353
354A virtual address on s/390 is made up of 3 parts
355The SX (segment index, roughly corresponding to the PGD & PMD in Linux
356terminology) being bits 1-11.
357
358The PX (page index, corresponding to the page table entry (pte) in Linux
359terminology) being bits 12-19.
360
361The remaining bits BX (the byte index are the offset in the page )
362i.e. bits 20 to 31.
363
364On z/Architecture in linux we currently make up an address from 4 parts.
365
366- The region index bits (RX) 0-32 we currently use bits 22-32
367- The segment index (SX) being bits 33-43
368- The page index (PX) being bits 44-51
369- The byte index (BX) being bits 52-63
370
371Notes:
372 1) s/390 has no PMD so the PMD is really the PGD also.
373 A lot of this stuff is defined in pgtable.h.
374
375 2) Also seeing as s/390's page indexes are only 1k in size
376 (bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
377 to make the best use of memory by updating 4 segment indices
378 entries each time we mess with a PMD & use offsets
379 0,1024,2048 & 3072 in this page as for our segment indexes.
380 On z/Architecture our page indexes are now 2k in size
381 ( bits 12-19 x 8 bytes per pte ) we do a similar trick
382 but only mess with 2 segment indices each time we mess with
383 a PMD.
384
385 3) As z/Architecture supports up to a massive 5-level page table lookup we
386 can only use 3 currently on Linux ( as this is all the generic kernel
387 currently supports ) however this may change in future
388 this allows us to access ( according to my sums )
389 4TB of virtual storage per process i.e.
390 4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
391 enough for another 2 or 3 of years I think :-).
392 to do this we use a region-third-table designation type in
393 our address space control registers.
394
395
396The Linux for s/390 & z/Architecture Kernel Task Structure
397==========================================================
398Each process/thread under Linux for S390 has its own kernel task_struct
399defined in linux/include/linux/sched.h
400The S390 on initialisation & resuming of a process on a cpu sets
401the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
402(which we use for per-processor globals).
403
404The kernel stack pointer is intimately tied with the task structure for
405each processor as follows::
406
407 s/390
408 ************************
409 * 1 page kernel stack *
410 * ( 4K ) *
411 ************************
412 * 1 page task_struct *
413 * ( 4K ) *
414 8K aligned ************************
415
416 z/Architecture
417 ************************
418 * 2 page kernel stack *
419 * ( 8K ) *
420 ************************
421 * 2 page task_struct *
422 * ( 8K ) *
423 16K aligned ************************
424
425What this means is that we don't need to dedicate any register or global
426variable to point to the current running process & can retrieve it with the
427following very simple construct for s/390 & one very similar for
428z/Architecture::
429
430 static inline struct task_struct * get_current(void)
431 {
432 struct task_struct *current;
433 __asm__("lhi %0,-8192\n\t"
434 "nr %0,15"
435 : "=r" (current) );
436 return current;
437 }
438
439i.e. just anding the current kernel stack pointer with the mask -8192.
440Thankfully because Linux doesn't have support for nested IO interrupts
441& our devices have large buffers can survive interrupts being shut for
442short amounts of time we don't need a separate stack for interrupts.
443
444
445
446
447Register Usage & Stackframes on Linux for s/390 & z/Architecture
448=================================================================
449Overview:
450---------
451This is the code that gcc produces at the top & the bottom of
452each function. It usually is fairly consistent & similar from
453function to function & if you know its layout you can probably
454make some headway in finding the ultimate cause of a problem
455after a crash without a source level debugger.
456
457Note: To follow stackframes requires a knowledge of C or Pascal &
458limited knowledge of one assembly language.
459
460It should be noted that there are some differences between the
461s/390 and z/Architecture stack layouts as the z/Architecture stack layout
462didn't have to maintain compatibility with older linkage formats.
463
464Glossary:
465---------
466alloca:
467 This is a built in compiler function for runtime allocation
468 of extra space on the callers stack which is obviously freed
469 up on function exit ( e.g. the caller may choose to allocate nothing
470 of a buffer of 4k if required for temporary purposes ), it generates
471 very efficient code ( a few cycles ) when compared to alternatives
472 like malloc.
473
474automatics:
475 These are local variables on the stack, i.e they aren't in registers &
476 they aren't static.
477
478back-chain:
479 This is a pointer to the stack pointer before entering a
480 framed functions ( see frameless function ) prologue got by
481 dereferencing the address of the current stack pointer,
482 i.e. got by accessing the 32 bit value at the stack pointers
483 current location.
484
485base-pointer:
486 This is a pointer to the back of the literal pool which
487 is an area just behind each procedure used to store constants
488 in each function.
489
490call-clobbered:
491 The caller probably needs to save these registers if there
492 is something of value in them, on the stack or elsewhere before making a
493 call to another procedure so that it can restore it later.
494
495epilogue:
496 The code generated by the compiler to return to the caller.
497
498frameless-function:
499 A frameless function in Linux for s390 & z/Architecture is one which doesn't
500 need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
501 given to it by the caller.
502
503 A frameless function never:
504
505 1) Sets up a back chain.
506 2) Calls alloca.
507 3) Calls other normal functions
508 4) Has automatics.
509
510GOT-pointer:
511 This is a pointer to the global-offset-table in ELF
512 ( Executable Linkable Format, Linux'es most common executable format ),
513 all globals & shared library objects are found using this pointer.
514
515lazy-binding
516 ELF shared libraries are typically only loaded when routines in the shared
517 library are actually first called at runtime. This is lazy binding.
518
519procedure-linkage-table
520 This is a table found from the GOT which contains pointers to routines
521 in other shared libraries which can't be called to by easier means.
522
523prologue:
524 The code generated by the compiler to set up the stack frame.
525
526outgoing-args:
527 This is extra area allocated on the stack of the calling function if the
528 parameters for the callee's cannot all be put in registers, the same
529 area can be reused by each function the caller calls.
530
531routine-descriptor:
532 A COFF executable format based concept of a procedure reference
533 actually being 8 bytes or more as opposed to a simple pointer to the routine.
534 This is typically defined as follows:
535
536 - Routine Descriptor offset 0=Pointer to Function
537 - Routine Descriptor offset 4=Pointer to Table of Contents
538
539 The table of contents/TOC is roughly equivalent to a GOT pointer.
540 & it means that shared libraries etc. can be shared between several
541 environments each with their own TOC.
542
543static-chain:
544 This is used in nested functions a concept adopted from pascal
545 by gcc not used in ansi C or C++ ( although quite useful ), basically it
546 is a pointer used to reference local variables of enclosing functions.
547 You might come across this stuff once or twice in your lifetime.
548
549 e.g.
550
551 The function below should return 11 though gcc may get upset & toss warnings
552 about unused variables::
553
554 int FunctionA(int a)
555 {
556 int b;
557 FunctionC(int c)
558 {
559 b=c+1;
560 }
561 FunctionC(10);
562 return(b);
563 }
564
565
566s/390 & z/Architecture Register usage
567=====================================
568
569======== ========================================== ===============
570r0 used by syscalls/assembly call-clobbered
571r1 used by syscalls/assembly call-clobbered
572r2 argument 0 / return value 0 call-clobbered
573r3 argument 1 / return value 1 (if long long) call-clobbered
574r4 argument 2 call-clobbered
575r5 argument 3 call-clobbered
576r6 argument 4 saved
577r7 pointer-to arguments 5 to ... saved
578r8 this & that saved
579r9 this & that saved
580r10 static-chain ( if nested function ) saved
581r11 frame-pointer ( if function used alloca ) saved
582r12 got-pointer saved
583r13 base-pointer saved
584r14 return-address saved
585r15 stack-pointer saved
586
587f0 argument 0 / return value ( float/double ) call-clobbered
588f2 argument 1 call-clobbered
589f4 z/Architecture argument 2 saved
590f6 z/Architecture argument 3 saved
591======== ========================================== ===============
592
593The remaining floating points
594f1,f3,f5 f7-f15 are call-clobbered.
595
596Notes:
597------
5981) The only requirement is that registers which are used
599 by the callee are saved, e.g. the compiler is perfectly
600 capable of using r11 for purposes other than a frame a
601 frame pointer if a frame pointer is not needed.
6022) In functions with variable arguments e.g. printf the calling procedure
603 is identical to one without variable arguments & the same number of
604 parameters. However, the prologue of this function is somewhat more
605 hairy owing to it having to move these parameters to the stack to
606 get va_start, va_arg & va_end to work.
6073) Access registers are currently unused by gcc but are used in
608 the kernel. Possibilities exist to use them at the moment for
609 temporary storage but it isn't recommended.
6104) Only 4 of the floating point registers are used for
611 parameter passing as older machines such as G3 only have only 4
612 & it keeps the stack frame compatible with other compilers.
613 However with IEEE floating point emulation under linux on the
614 older machines you are free to use the other 12.
6155) A long long or double parameter cannot be have the
616 first 4 bytes in a register & the second four bytes in the
617 outgoing args area. It must be purely in the outgoing args
618 area if crossing this boundary.
6196) Floating point parameters are mixed with outgoing args
620 on the outgoing args area in the order the are passed in as parameters.
6217) Floating point arguments 2 & 3 are saved in the outgoing args area for
622 z/Architecture
623
624
625Stack Frame Layout
626------------------
627
628========= ============== ======================================================
629s/390 z/Architecture
630========= ============== ======================================================
6310 0 back chain ( a 0 here signifies end of back chain )
6324 8 eos ( end of stack, not used on Linux for S390 used
633 in other linkage formats )
6348 16 glue used in other s/390 linkage formats for saved
635 routine descriptors etc.
63612 24 glue used in other s/390 linkage formats for saved
637 routine descriptors etc.
63816 32 scratch area
63920 40 scratch area
64024 48 saved r6 of caller function
64128 56 saved r7 of caller function
64232 64 saved r8 of caller function
64336 72 saved r9 of caller function
64440 80 saved r10 of caller function
64544 88 saved r11 of caller function
64648 96 saved r12 of caller function
64752 104 saved r13 of caller function
64856 112 saved r14 of caller function
64960 120 saved r15 of caller function
65064 128 saved f4 of caller function
65172 132 saved f6 of caller function
65280 undefined
65396 160 outgoing args passed from caller to callee
65496+x 160+x possible stack alignment ( 8 bytes desirable )
65596+x+y 160+x+y alloca space of caller ( if used )
65696+x+y+z 160+x+y+z automatics of caller ( if used )
6570 back-chain
658========= ============== ======================================================
659
660A sample program with comments.
661===============================
662
663Comments on the function test
664-----------------------------
6651) It didn't need to set up a pointer to the constant pool gpr13 as it is not
666 used ( :-( ).
6672) This is a frameless function & no stack is bought.
6683) The compiler was clever enough to recognise that it could return the
669 value in r2 as well as use it for the passed in parameter ( :-) ).
6704) The basr ( branch relative & save ) trick works as follows the instruction
671 has a special case with r0,r0 with some instruction operands is understood as
672 the literal value 0, some risc architectures also do this ). So now
673 we are branching to the next address & the address new program counter is
674 in r13,so now we subtract the size of the function prologue we have executed
675 the size of the literal pool to get to the top of the literal pool::
676
677
678 0040037c int test(int b)
679 { # Function prologue below
680 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14
681 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using
682 400382: a7 da ff fa ahi %r13,-6 # basr trick
683 return(5+b);
684 # Huge main program
685 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2
686
687 # Function epilogue below
688 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14
689 40038e: 07 fe br %r14 # return
690 }
691
692Comments on the function main
693-----------------------------
6941) The compiler did this function optimally ( 8-) )::
695
696 Literal pool for main.
697 400390: ff ff ff ec .long 0xffffffec
698 main(int argc,char *argv[])
699 { # Function prologue below
700 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers
701 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0
702 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving
703 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to
704 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool
705 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain
706
707 return(test(5)); # Main Program Below
708 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from
709 # literal pool
710 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5
711 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return
712 # address using branch & save instruction.
713
714 # Function Epilogue below
715 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers.
716 4003b8: 07 fe br %r14 # return to do program exit
717 }
718
719
720Compiler updates
721----------------
722
723::
724
725 main(int argc,char *argv[])
726 {
727 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15)
728 400500: a7 d5 00 04 bras %r13,400508 <main+0xc>
729 400504: 00 40 04 f4 .long 0x004004f4
730 # compiler now puts constant pool in code to so it saves an instruction
731 400508: 18 0f lr %r0,%r15
732 40050a: a7 fa ff a0 ahi %r15,-96
733 40050e: 50 00 f0 00 st %r0,0(%r15)
734 return(test(5));
735 400512: 58 10 d0 00 l %r1,0(%r13)
736 400516: a7 28 00 05 lhi %r2,5
737 40051a: 0d e1 basr %r14,%r1
738 # compiler adds 1 extra instruction to epilogue this is done to
739 # avoid processor pipeline stalls owing to data dependencies on g5 &
740 # above as register 14 in the old code was needed directly after being loaded
741 # by the lm %r11,%r15,140(%r15) for the br %14.
742 40051c: 58 40 f0 98 l %r4,152(%r15)
743 400520: 98 7f f0 7c lm %r7,%r15,124(%r15)
744 400524: 07 f4 br %r4
745 }
746
747
748Hartmut ( our compiler developer ) also has been threatening to take out the
749stack backchain in optimised code as this also causes pipeline stalls, you
750have been warned.
751
75264 bit z/Architecture code disassembly
753--------------------------------------
754
755If you understand the stuff above you'll understand the stuff
756below too so I'll avoid repeating myself & just say that
757some of the instructions have g's on the end of them to indicate
758they are 64 bit & the stack offsets are a bigger,
759the only other difference you'll find between 32 & 64 bit is that
760we now use f4 & f6 for floating point arguments on 64 bit::
761
762 00000000800005b0 <test>:
763 int test(int b)
764 {
765 return(5+b);
766 800005b0: a7 2a 00 05 ahi %r2,5
767 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer
768 800005b8: 07 fe br %r14
769 800005ba: 07 07 bcr 0,%r7
770
771
772 }
773
774 00000000800005bc <main>:
775 main(int argc,char *argv[])
776 {
777 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15)
778 800005c2: b9 04 00 1f lgr %r1,%r15
779 800005c6: a7 fb ff 60 aghi %r15,-160
780 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15)
781 return(test(5));
782 800005d0: a7 29 00 05 lghi %r2,5
783 # brasl allows jumps > 64k & is overkill here bras would do fune
784 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test>
785 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15)
786 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15)
787 800005e6: 07 f4 br %r4
788 }
789
790
791
792Compiling programs for debugging on Linux for s/390 & z/Architecture
793====================================================================
794-gdwarf-2 now works it should be considered the default debugging
795format for s/390 & z/Architecture as it is more reliable for debugging
796shared libraries, normal -g debugging works much better now
797Thanks to the IBM java compiler developers bug reports.
798
799This is typically done adding/appending the flags -g or -gdwarf-2 to the
800CFLAGS & LDFLAGS variables Makefile of the program concerned.
801
802If using gdb & you would like accurate displays of registers &
803stack traces compile without optimisation i.e make sure
804that there is no -O2 or similar on the CFLAGS line of the Makefile &
805the emitted gcc commands, obviously this will produce worse code
806( not advisable for shipment ) but it is an aid to the debugging process.
807
808This aids debugging because the compiler will copy parameters passed in
809in registers onto the stack so backtracing & looking at passed in
810parameters will work, however some larger programs which use inline functions
811will not compile without optimisation.
812
813Debugging with optimisation has since much improved after fixing
814some bugs, please make sure you are using gdb-5.0 or later developed
815after Nov'2000.
816
817
818
819Debugging under VM
820==================
821
822Notes
823-----
824Addresses & values in the VM debugger are always hex never decimal
825Address ranges are of the format <HexValue1>-<HexValue2> or
826<HexValue1>.<HexValue2>
827For example, the address range 0x2000 to 0x3000 can be described as 2000-3000
828or 2000.1000
829
830The VM Debugger is case insensitive.
831
832VM's strengths are usually other debuggers weaknesses you can get at any
833resource no matter how sensitive e.g. memory management resources, change
834address translation in the PSW. For kernel hacking you will reap dividends if
835you get good at it.
836
837The VM Debugger displays operators but not operands, and also the debugger
838displays useful information on the same line as the author of the code probably
839felt that it was a good idea not to go over the 80 columns on the screen.
840This isn't as unintuitive as it may seem as the s/390 instructions are easy to
841decode mentally and you can make a good guess at a lot of them as all the
842operands are nibble (half byte aligned).
843So if you have an objdump listing by hand, it is quite easy to follow, and if
844you don't have an objdump listing keep a copy of the s/390 Reference Summary
845or alternatively the s/390 principles of operation next to you.
846e.g. even I can guess that
8470001AFF8' LR 180F CC 0
848is a ( load register ) lr r0,r15
849
850Also it is very easy to tell the length of a 390 instruction from the 2 most
851significant bits in the instruction (not that this info is really useful except
852if you are trying to make sense of a hexdump of code).
853Here is a table
854
855======================= ==================
856Bits Instruction Length
857======================= ==================
85800 2 Bytes
85901 4 Bytes
86010 4 Bytes
86111 6 Bytes
862======================= ==================
863
864The debugger also displays other useful info on the same line such as the
865addresses being operated on destination addresses of branches & condition codes.
866e.g.::
867
868 00019736' AHI A7DAFF0E CC 1
869 000198BA' BRC A7840004 -> 000198C2' CC 0
870 000198CE' STM 900EF068 >> 0FA95E78 CC 2
871
872
873
874Useful VM debugger commands
875---------------------------
876
877I suppose I'd better mention this before I start
878to list the current active traces do::
879
880 Q TR
881
882there can be a maximum of 255 of these per set
883( more about trace sets later ).
884
885To stop traces issue a::
886
887 TR END.
888
889To delete a particular breakpoint issue::
890
891 TR DEL <breakpoint number>
892
893The PA1 key drops to CP mode so you can issue debugger commands,
894Doing alt c (on my 3270 console at least ) clears the screen.
895
896hitting b <enter> comes back to the running operating system
897from cp mode ( in our case linux ).
898
899It is typically useful to add shortcuts to your profile.exec file
900if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
901file here are a few from mine::
902
903 /* this gives me command history on issuing f12 */
904 set pf12 retrieve
905 /* this continues */
906 set pf8 imm b
907 /* goes to trace set a */
908 set pf1 imm tr goto a
909 /* goes to trace set b */
910 set pf2 imm tr goto b
911 /* goes to trace set c */
912 set pf3 imm tr goto c
913
914
915
916Instruction Tracing
917-------------------
918Setting a simple breakpoint::
919
920 TR I PSWA <address>
921
922To debug a particular function try::
923
924 TR I R <function address range>
925 TR I on its own will single step.
926 TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
927
928e.g.::
929
930 TR I DATA 4D R 0197BC.4000
931
932will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
933
934if you were inclined you could add traces for all branch instructions &
935suffix them with the run prefix so you would have a backtrace on screen
936when a program crashes::
937
938 TR BR <INTO OR FROM> will trace branches into or out of an address.
939
940e.g.::
941
942 TR BR INTO 0
943
944is often quite useful if a program is getting awkward & deciding
945to branch to 0 & crashing as this will stop at the address before in jumps to 0.
946
947::
948
949 TR I R <address range> RUN cmd d g
950
951single steps a range of addresses but stays running &
952displays the gprs on each step.
953
954
955
956Displaying & modifying Registers
957--------------------------------
958D G
959 will display all the gprs
960
961Adding a extra G to all the commands is necessary to access the full 64 bit
962content in VM on z/Architecture. Obviously this isn't required for access
963registers as these are still 32 bit.
964
965e.g.
966
967DGG
968 instead of DG
969
970D X
971 will display all the control registers
972D AR
973 will display all the access registers
974D AR4-7
975 will display access registers 4 to 7
976CPU ALL D G
977 will display the GRPS of all CPUS in the configuration
978D PSW
979 will display the current PSW
980st PSW 2000
981 will put the value 2000 into the PSW & cause crash your machine.
982D PREFIX
983 displays the prefix offset
984
985
986Displaying Memory
987-----------------
988To display memory mapped using the current PSW's mapping try::
989
990 D <range>
991
992To make VM display a message each time it hits a particular address and
993continue try:
994
995D I<range>
996 will disassemble/display a range of instructions.
997
998ST addr 32 bit word
999 will store a 32 bit aligned address
1000D T<range>
1001 will display the EBCDIC in an address (if you are that way inclined)
1002D R<range>
1003 will display real addresses ( without DAT ) but with prefixing.
1004
1005There are other complex options to display if you need to get at say home space
1006but are in primary space the easiest thing to do is to temporarily
1007modify the PSW to the other addressing mode, display the stuff & then
1008restore it.
1009
1010
1011
1012Hints
1013-----
1014If you want to issue a debugger command without halting your virtual machine
1015with the PA1 key try prefixing the command with #CP e.g.::
1016
1017 #cp tr i pswa 2000
1018
1019also suffixing most debugger commands with RUN will cause them not
1020to stop just display the mnemonic at the current instruction on the console.
1021
1022If you have several breakpoints you want to put into your program &
1023you get fed up of cross referencing with System.map
1024you can do the following trick for several symbols.
1025
1026::
1027
1028 grep do_signal System.map
1029
1030which emits the following among other things::
1031
1032 0001f4e0 T do_signal
1033
1034now you can do::
1035
1036 TR I PSWA 0001f4e0 cmd msg * do_signal
1037
1038This sends a message to your own console each time do_signal is entered.
1039( As an aside I wrote a perl script once which automatically generated a REXX
1040script with breakpoints on every kernel procedure, this isn't a good idea
1041because there are thousands of these routines & VM can only set 255 breakpoints
1042at a time so you nearly had to spend as long pruning the file down as you would
1043entering the msgs by hand), however, the trick might be useful for a single
1044object file. In the 3270 terminal emulator x3270 there is a very useful option
1045in the file menu called "Save Screen In File" - this is very good for keeping a
1046copy of traces.
1047
1048From CMS help <command name> will give you online help on a particular command.
1049e.g.::
1050
1051 HELP DISPLAY
1052
1053Also CP has a file called profile.exec which automatically gets called
1054on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
1055CP has a feature similar to doskey, it may be useful for you to
1056use profile.exec to define some keystrokes.
1057
1058SET PF9 IMM B
1059 This does a single step in VM on pressing F8.
1060
1061SET PF10 ^
1062 This sets up the ^ key.
1063 which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed
1064 directly into some 3270 consoles.
1065
1066SET PF11 ^-
1067 This types the starting keystrokes for a sysrq see SysRq below.
1068SET PF12 RETRIEVE
1069 This retrieves command history on pressing F12.
1070
1071
1072Sometimes in VM the display is set up to scroll automatically this
1073can be very annoying if there are messages you wish to look at
1074to stop this do
1075
1076TERM MORE 255 255
1077 This will nearly stop automatic screen updates, however it will
1078 cause a denial of service if lots of messages go to the 3270 console,
1079 so it would be foolish to use this as the default on a production machine.
1080
1081
1082Tracing particular processes
1083----------------------------
1084The kernel's text segment is intentionally at an address in memory that it will
1085very seldom collide with text segments of user programs ( thanks Martin ),
1086this simplifies debugging the kernel.
1087However it is quite common for user processes to have addresses which collide
1088this can make debugging a particular process under VM painful under normal
1089circumstances as the process may change when doing a::
1090
1091 TR I R <address range>.
1092
1093Thankfully after reading VM's online help I figured out how to debug
1094I particular process.
1095
1096Your first problem is to find the STD ( segment table designation )
1097of the program you wish to debug.
1098There are several ways you can do this here are a few
1099
1100Run::
1101
1102 objdump --syms <program to be debugged> | grep main
1103
1104To get the address of main in the program. Then::
1105
1106 tr i pswa <address of main>
1107
1108Start the program, if VM drops to CP on what looks like the entry
1109point of the main function this is most likely the process you wish to debug.
1110Now do a D X13 or D XG13 on z/Architecture.
1111
1112On 31 bit the STD is bits 1-19 ( the STO segment table origin )
1113& 25-31 ( the STL segment table length ) of CR13.
1114
1115now type::
1116
1117 TR I R STD <CR13's value> 0.7fffffff
1118
1119e.g.::
1120
1121 TR I R STD 8F32E1FF 0.7fffffff
1122
1123Another very useful variation is::
1124
1125 TR STORE INTO STD <CR13's value> <address range>
1126
1127for finding out when a particular variable changes.
1128
1129An alternative way of finding the STD of a currently running process
1130is to do the following, ( this method is more complex but
1131could be quite convenient if you aren't updating the kernel much &
1132so your kernel structures will stay constant for a reasonable period of
1133time ).
1134
1135::
1136
1137 grep task /proc/<pid>/status
1138
1139from this you should see something like::
1140
1141 task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
1142
1143This now gives you a pointer to the task structure.
1144
1145Now make::
1146
1147 CC:="s390-gcc -g" kernel/sched.s
1148
1149To get the task_struct stabinfo.
1150
1151( task_struct is defined in include/linux/sched.h ).
1152
1153Now we want to look at
1154task->active_mm->pgd
1155
1156on my machine the active_mm in the task structure stab is
1157active_mm:(4,12),672,32
1158
1159its offset is 672/8=84=0x54
1160
1161the pgd member in the mm_struct stab is
1162pgd:(4,6)=*(29,5),96,32
1163so its offset is 96/8=12=0xc
1164
1165so we'll::
1166
1167 hexdump -s 0xf160054 /dev/mem | more
1168
1169i.e. task_struct+active_mm offset
1170to look at the active_mm member::
1171
1172 f160054 0fee cc60 0019 e334 0000 0000 0000 0011
1173
1174::
1175
1176 hexdump -s 0x0feecc6c /dev/mem | more
1177
1178i.e. active_mm+pgd offset::
1179
1180 feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
1181
1182we get something like
1183now do::
1184
1185 TR I R STD <pgd|0x7f> 0.7fffffff
1186
1187i.e. the 0x7f is added because the pgd only
1188gives the page table origin & we need to set the low bits
1189to the maximum possible segment table length.
1190
1191::
1192
1193 TR I R STD 0f2c007f 0.7fffffff
1194
1195on z/Architecture you'll probably need to do::
1196
1197 TR I R STD <pgd|0x7> 0.ffffffffffffffff
1198
1199to set the TableType to 0x1 & the Table length to 3.
1200
1201
1202
1203Tracing Program Exceptions
1204--------------------------
1205If you get a crash which says something like
1206illegal operation or specification exception followed by a register dump
1207You can restart linux & trace these using the tr prog <range or value> trace
1208option.
1209
1210
1211The most common ones you will normally be tracing for is:
1212
1213- 1=operation exception
1214- 2=privileged operation exception
1215- 4=protection exception
1216- 5=addressing exception
1217- 6=specification exception
1218- 10=segment translation exception
1219- 11=page translation exception
1220
1221The full list of these is on page 22 of the current s/390 Reference Summary.
1222e.g.
1223
1224tr prog 10 will trace segment translation exceptions.
1225
1226tr prog on its own will trace all program interruption codes.
1227
1228Trace Sets
1229----------
1230On starting VM you are initially in the INITIAL trace set.
1231You can do a Q TR to verify this.
1232If you have a complex tracing situation where you wish to wait for instance
1233till a driver is open before you start tracing IO, but know in your
1234heart that you are going to have to make several runs through the code till you
1235have a clue whats going on.
1236
1237What you can do is::
1238
1239 TR I PSWA <Driver open address>
1240
1241hit b to continue till breakpoint
1242
1243reach the breakpoint
1244
1245now do your::
1246
1247 TR GOTO B
1248 TR IO 7c08-7c09 inst int run
1249
1250or whatever the IO channels you wish to trace are & hit b
1251
1252To got back to the initial trace set do::
1253
1254 TR GOTO INITIAL
1255
1256& the TR I PSWA <Driver open address> will be the only active breakpoint again.
1257
1258
1259Tracing linux syscalls under VM
1260-------------------------------
1261Syscalls are implemented on Linux for S390 by the Supervisor call instruction
1262(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
1263opcode and the second byte being the syscall number. They are traced using the
1264simple command::
1265
1266 TR SVC <Optional value or range>
1267
1268the syscalls are defined in linux/arch/s390/include/asm/unistd.h
1269e.g. to trace all file opens just do::
1270
1271 TR SVC 5 ( as this is the syscall number of open )
1272
1273
1274SMP Specific commands
1275---------------------
1276To find out how many cpus you have
1277Q CPUS displays all the CPU's available to your virtual machine
1278To find the cpu that the current cpu VM debugger commands are being directed at
1279do Q CPU to change the current cpu VM debugger commands are being directed at
1280do::
1281
1282 CPU <desired cpu no>
1283
1284On a SMP guest issue a command to all CPUs try prefixing the command with cpu
1285all. To issue a command to a particular cpu try cpu <cpu number> e.g.::
1286
1287 CPU 01 TR I R 2000.3000
1288
1289If you are running on a guest with several cpus & you have a IO related problem
1290& cannot follow the flow of code but you know it isn't smp related.
1291
1292from the bash prompt issue::
1293
1294 shutdown -h now or halt.
1295
1296do a::
1297
1298 Q CPUS
1299
1300to find out how many cpus you have detach each one of them from cp except
1301cpu 0 by issuing a::
1302
1303 DETACH CPU 01-(number of cpus in configuration)
1304
1305& boot linux again.
1306
1307TR SIGP
1308 will trace inter processor signal processor instructions.
1309
1310DEFINE CPU 01-(number in configuration)
1311 will get your guests cpus back.
1312
1313
1314Help for displaying ascii textstrings
1315-------------------------------------
1316On the very latest VM Nucleus'es VM can now display ascii
1317( thanks Neale for the hint ) by doing::
1318
1319 D TX<lowaddr>.<len>
1320
1321e.g.::
1322
1323 D TX0.100
1324
1325Alternatively
1326=============
1327Under older VM debuggers (I love EBDIC too) you can use following little
1328program which converts a command line of hex digits to ascii text. It can be
1329compiled under linux and you can copy the hex digits from your x3270 terminal
1330to your xterm if you are debugging from a linuxbox.
1331
1332This is quite useful when looking at a parameter passed in as a text string
1333under VM ( unless you are good at decoding ASCII in your head ).
1334
1335e.g. consider tracing an open syscall::
1336
1337 TR SVC 5
1338
1339We have stopped at a breakpoint::
1340
1341 000151B0' SVC 0A05 -> 0001909A' CC 0
1342
1343D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
1344(for the layout of the prefix area consult the "Fixed Storage Locations"
1345chapter of the s/390 Reference Summary if you have it available).
1346
1347::
1348
1349 V00000020 070C2000 800151B2
1350
1351The problem state bit wasn't set & it's also too early in the boot sequence
1352for it to be a userspace SVC if it was we would have to temporarily switch the
1353psw to user space addressing so we could get at the first parameter of the open
1354in gpr2.
1355
1356Next do a::
1357
1358 D G2
1359 GPR 2 = 00014CB4
1360
1361Now display what gpr2 is pointing to::
1362
1363 D 00014CB4.20
1364 V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5
1365 V00014CC4 FC00014C B4001001 E0001000 B8070707
1366
1367Now copy the text till the first 00 hex ( which is the end of the string
1368to an xterm & do hex2ascii on it::
1369
1370 hex2ascii 2F646576 2F636F6E 736F6C65 00
1371
1372outputs::
1373
1374 Decoded Hex:=/ d e v / c o n s o l e 0x00
1375
1376We were opening the console device,
1377
1378You can compile the code below yourself for practice :-),
1379
1380::
1381
1382 /*
1383 * hex2ascii.c
1384 * a useful little tool for converting a hexadecimal command line to ascii
1385 *
1386 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
1387 * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
1388 */
1389 #include <stdio.h>
1390
1391 int main(int argc,char *argv[])
1392 {
1393 int cnt1,cnt2,len,toggle=0;
1394 int startcnt=1;
1395 unsigned char c,hex;
1396
1397 if(argc>1&&(strcmp(argv[1],"-a")==0))
1398 startcnt=2;
1399 printf("Decoded Hex:=");
1400 for(cnt1=startcnt;cnt1<argc;cnt1++)
1401 {
1402 len=strlen(argv[cnt1]);
1403 for(cnt2=0;cnt2<len;cnt2++)
1404 {
1405 c=argv[cnt1][cnt2];
1406 if(c>='0'&&c<='9')
1407 c=c-'0';
1408 if(c>='A'&&c<='F')
1409 c=c-'A'+10;
1410 if(c>='a'&&c<='f')
1411 c=c-'a'+10;
1412 switch(toggle)
1413 {
1414 case 0:
1415 hex=c<<4;
1416 toggle=1;
1417 break;
1418 case 1:
1419 hex+=c;
1420 if(hex<32||hex>127)
1421 {
1422 if(startcnt==1)
1423 printf("0x%02X ",(int)hex);
1424 else
1425 printf(".");
1426 }
1427 else
1428 {
1429 printf("%c",hex);
1430 if(startcnt==1)
1431 printf(" ");
1432 }
1433 toggle=0;
1434 break;
1435 }
1436 }
1437 }
1438 printf("\n");
1439 }
1440
1441
1442
1443
1444Stack tracing under VM
1445----------------------
1446A basic backtrace
1447-----------------
1448
1449Here are the tricks I use 9 out of 10 times it works pretty well,
1450
1451When your backchain reaches a dead end
1452--------------------------------------
1453This can happen when an exception happens in the kernel and the kernel is
1454entered twice. If you reach the NULL pointer at the end of the back chain you
1455should be able to sniff further back if you follow the following tricks.
14561) A kernel address should be easy to recognise since it is in
1457primary space & the problem state bit isn't set & also
1458The Hi bit of the address is set.
14592) Another backchain should also be easy to recognise since it is an
1460address pointing to another address approximately 100 bytes or 0x70 hex
1461behind the current stackpointer.
1462
1463
1464Here is some practice.
1465
1466boot the kernel & hit PA1 at some random time
1467
1468d g to display the gprs, this should display something like::
1469
1470 GPR 0 = 00000001 00156018 0014359C 00000000
1471 GPR 4 = 00000001 001B8888 000003E0 00000000
1472 GPR 8 = 00100080 00100084 00000000 000FE000
1473 GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8
1474
1475Note that GPR14 is a return address but as we are real men we are going to
1476trace the stack.
1477display 0x40 bytes after the stack pointer::
1478
1479 V000FFED8 000FFF38 8001B838 80014C8E 000FFF38
1480 V000FFEE8 00000000 00000000 000003E0 00000000
1481 V000FFEF8 00100080 00100084 00000000 000FE000
1482 V000FFF08 00010400 8001B2DC 8001B36A 000FFED8
1483
1484
1485Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
1486you look above at our stackframe & also agrees with GPR14.
1487
1488now backchain::
1489
1490 d 000FFF38.40
1491
1492we now are taking the contents of SP to get our first backchain::
1493
1494 V000FFF38 000FFFA0 00000000 00014995 00147094
1495 V000FFF48 00147090 001470A0 000003E0 00000000
1496 V000FFF58 00100080 00100084 00000000 001BF1D0
1497 V000FFF68 00010400 800149BA 80014CA6 000FFF38
1498
1499This displays a 2nd return address of 80014CA6
1500
1501now do::
1502
1503 d 000FFFA0.40
1504
1505for our 3rd backchain::
1506
1507 V000FFFA0 04B52002 0001107F 00000000 00000000
1508 V000FFFB0 00000000 00000000 FF000000 0001107F
1509 V000FFFC0 00000000 00000000 00000000 00000000
1510 V000FFFD0 00010400 80010802 8001085A 000FFFA0
1511
1512
1513our 3rd return address is 8001085A
1514
1515as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
1516kernel entry routines for the sake of optimisation don't set up a backchain.
1517
1518now look at System.map to see if the addresses make any sense::
1519
1520 grep -i 0001b3 System.map
1521
1522outputs among other things::
1523
1524 0001b304 T cpu_idle
1525
1526so 8001B36A
1527is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
1528
1529::
1530
1531 grep -i 00014 System.map
1532
1533produces among other things::
1534
1535 00014a78 T start_kernel
1536
1537so 0014CA6 is start_kernel+some hex number I can't add in my head.
1538
1539::
1540
1541 grep -i 00108 System.map
1542
1543this produces::
1544
1545 00010800 T _stext
1546
1547so 8001085A is _stext+0x5a
1548
1549Congrats you've done your first backchain.
1550
1551
1552
1553s/390 & z/Architecture IO Overview
1554==================================
1555
1556I am not going to give a course in 390 IO architecture as this would take me
1557quite a while and I'm no expert. Instead I'll give a 390 IO architecture
1558summary for Dummies. If you have the s/390 principles of operation available
1559read this instead. If nothing else you may find a few useful keywords in here
1560and be able to use them on a web search engine to find more useful information.
1561
1562Unlike other bus architectures modern 390 systems do their IO using mostly
1563fibre optics and devices such as tapes and disks can be shared between several
1564mainframes. Also S390 can support up to 65536 devices while a high end PC based
1565system might be choking with around 64.
1566
1567Here is some of the common IO terminology:
1568
1569Subchannel:
1570 This is the logical number most IO commands use to talk to an IO device. There
1571 can be up to 0x10000 (65536) of these in a configuration, typically there are a
1572 few hundred. Under VM for simplicity they are allocated contiguously, however
1573 on the native hardware they are not. They typically stay consistent between
1574 boots provided no new hardware is inserted or removed.
1575
1576 Under Linux for s390 we use these as IRQ's and also when issuing an IO command
1577 (CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
1578 START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
1579 of the device we wish to talk to. The most important of these instructions are
1580 START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
1581 completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
1582 up to 8 channel paths to a device, this offers redundancy if one is not
1583 available.
1584
1585Device Number:
1586 This number remains static and is closely tied to the hardware. There are 65536
1587 of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
1588 another lsb 8 bits. These remain static even if more devices are inserted or
1589 removed from the hardware. There is a 1 to 1 mapping between subchannels and
1590 device numbers, provided devices aren't inserted or removed.
1591
1592Channel Control Words:
1593 CCWs are linked lists of instructions initially pointed to by an operation
1594 request block (ORB), which is initially given to Start Subchannel (SSCH)
1595 command along with the subchannel number for the IO subsystem to process
1596 while the CPU continues executing normal code.
1597 CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
1598 Format 1 (31 bit). These are typically used to issue read and write (and many
1599 other) instructions. They consist of a length field and an absolute address
1600 field.
1601
1602 Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
1603 when the channel is idle, and the second for device end (secondary status).
1604 Sometimes you get both concurrently. You check how the IO went on by issuing a
1605 TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
1606 response block (IRB). If you get channel and device end status in the IRB
1607 without channel checks etc. your IO probably went okay. If you didn't you
1608 probably need to examine the IRB, extended status word etc.
1609 If an error occurs, more sophisticated control units have a facility known as
1610 concurrent sense. This means that if an error occurs Extended sense information
1611 will be presented in the Extended status word in the IRB. If not you have to
1612 issue a subsequent SENSE CCW command after the test subchannel.
1613
1614
1615TPI (Test pending interrupt) can also be used for polled IO, but in
1616multitasking multiprocessor systems it isn't recommended except for
1617checking special cases (i.e. non looping checks for pending IO etc.).
1618
1619Store Subchannel and Modify Subchannel can be used to examine and modify
1620operating characteristics of a subchannel (e.g. channel paths).
1621
1622Other IO related Terms:
1623
1624Sysplex:
1625 S390's Clustering Technology
1626QDIO:
1627 S390's new high speed IO architecture to support devices such as gigabit
1628 ethernet, this architecture is also designed to be forward compatible with
1629 upcoming 64 bit machines.
1630
1631
1632General Concepts
1633----------------
1634
1635Input Output Processors (IOP's) are responsible for communicating between
1636the mainframe CPU's & the channel & relieve the mainframe CPU's from the
1637burden of communicating with IO devices directly, this allows the CPU's to
1638concentrate on data processing.
1639
1640IOP's can use one or more links ( known as channel paths ) to talk to each
1641IO device. It first checks for path availability & chooses an available one,
1642then starts ( & sometimes terminates IO ).
1643There are two types of channel path: ESCON & the Parallel IO interface.
1644
1645IO devices are attached to control units, control units provide the
1646logic to interface the channel paths & channel path IO protocols to
1647the IO devices, they can be integrated with the devices or housed separately
1648& often talk to several similar devices ( typical examples would be raid
1649controllers or a control unit which connects to 1000 3270 terminals )::
1650
1651
1652 +---------------------------------------------------------------+
1653 | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
1654 | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | |
1655 | | | | | | | | | | Memory | | Storage | |
1656 | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
1657 |---------------------------------------------------------------+
1658 | IOP | IOP | IOP |
1659 |---------------------------------------------------------------
1660 | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
1661 ----------------------------------------------------------------
1662 || ||
1663 || Bus & Tag Channel Path || ESCON
1664 || ====================== || Channel
1665 || || || || Path
1666 +----------+ +----------+ +----------+
1667 | | | | | |
1668 | CU | | CU | | CU |
1669 | | | | | |
1670 +----------+ +----------+ +----------+
1671 | | | | |
1672 +----------+ +----------+ +----------+ +----------+ +----------+
1673 |I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device|
1674 +----------+ +----------+ +----------+ +----------+ +----------+
1675 CPU = Central Processing Unit
1676 C = Channel
1677 IOP = IP Processor
1678 CU = Control Unit
1679
1680The 390 IO systems come in 2 flavours the current 390 machines support both
1681
1682The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
1683sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
1684Interface (OEMI).
1685
1686This byte wide Parallel channel path/bus has parity & data on the "Bus" cable
1687and control lines on the "Tag" cable. These can operate in byte multiplex mode
1688for sharing between several slow devices or burst mode and monopolize the
1689channel for the whole burst. Up to 256 devices can be addressed on one of these
1690cables. These cables are about one inch in diameter. The maximum unextended
1691length supported by these cables is 125 Meters but this can be extended up to
16922km with a fibre optic channel extended such as a 3044. The maximum burst speed
1693supported is 4.5 megabytes per second. However, some really old processors
1694support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
1695One of these paths can be daisy chained to up to 8 control units.
1696
1697
1698ESCON if fibre optic it is also called FICON
1699Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
1700lasers for communication at a signaling rate of up to 200 megabits/sec. As
170110bits are transferred for every 8 bits info this drops to 160 megabits/sec
1702and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
1703operates in burst mode.
1704
1705ESCONs typical max cable length is 3km for the led version and 20km for the
1706laser version known as XDF (extended distance facility). This can be further
1707extended by using an ESCON director which triples the above mentioned ranges.
1708Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
1709the standard Bus & Tag control protocol is however present within the packets.
1710Up to 256 devices can be attached to each control unit that uses one of these
1711interfaces.
1712
1713Common 390 Devices include:
1714Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
1715Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
1716DASD's direct access storage devices ( otherwise known as hard disks ).
1717Tape Drives.
1718CTC ( Channel to Channel Adapters ),
1719ESCON or Parallel Cables used as a very high speed serial link
1720between 2 machines.
1721
1722
1723Debugging IO on s/390 & z/Architecture under VM
1724===============================================
1725
1726Now we are ready to go on with IO tracing commands under VM
1727
1728A few self explanatory queries::
1729
1730 Q OSA
1731 Q CTC
1732 Q DISK ( This command is CMS specific )
1733 Q DASD
1734
1735Q OSA on my machine returns::
1736
1737 OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000
1738 OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001
1739 OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002
1740 OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003
1741
1742If you have a guest with certain privileges you may be able to see devices
1743which don't belong to you. To avoid this, add the option V.
1744e.g.::
1745
1746 Q V OSA
1747
1748Now using the device numbers returned by this command we will
1749Trace the io starting up on the first device 7c08 & 7c09
1750In our simplest case we can trace the
1751start subchannels
1752like TR SSCH 7C08-7C09
1753or the halt subchannels
1754or TR HSCH 7C08-7C09
1755MSCH's ,STSCH's I think you can guess the rest
1756
1757A good trick is tracing all the IO's and CCWS and spooling them into the reader
1758of another VM guest so he can ftp the logfile back to his own machine. I'll do
1759a small bit of this and give you a look at the output.
1760
17611) Spool stdout to VM reader::
1762
1763 SP PRT TO (another vm guest ) or * for the local vm guest
1764
17652) Fill the reader with the trace::
1766
1767 TR IO 7c08-7c09 INST INT CCW PRT RUN
1768
17693) Start up linux::
1770
1771 i 00c
17724) Finish the trace::
1773
1774 TR END
1775
17765) close the reader::
1777
1778 C PRT
1779
17806) list reader contents::
1781
1782 RDRLIST
1783
17847) copy it to linux4's minidisk::
1785
1786 RECEIVE / LOG TXT A1 ( replace
1787
17888)
1789filel & press F11 to look at it
1790You should see something like::
1791
1792 00020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08
1793 CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80
1794 CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........
1795 IDAL 43D8AFE8
1796 IDAL 0FB76000
1797 00020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4
1798 00021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08
1799 CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC
1800 KEY 0 FPI C0 CC 0 CTLS 4007
1801 00022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08
1802
1803If you don't like messing up your readed ( because you possibly booted from it )
1804you can alternatively spool it to another readers guest.
1805
1806
1807Other common VM device related commands
1808---------------------------------------------
1809These commands are listed only because they have
1810been of use to me in the past & may be of use to
1811you too. For more complete info on each of the commands
1812use type HELP <command> from CMS.
1813
1814detaching devices::
1815
1816 DET <devno range>
1817 ATT <devno range> <guest>
1818
1819attach a device to guest * for your own guest
1820
1821READY <devno>
1822 cause VM to issue a fake interrupt.
1823
1824The VARY command is normally only available to VM administrators::
1825
1826 VARY ON PATH <path> TO <devno range>
1827 VARY OFF PATH <PATH> FROM <devno range>
1828
1829This is used to switch on or off channel paths to devices.
1830
1831Q CHPID <channel path ID>
1832 This displays state of devices using this channel path
1833
1834D SCHIB <subchannel>
1835 This displays the subchannel information SCHIB block for the device.
1836 this I believe is also only available to administrators.
1837
1838DEFINE CTC <devno>
1839 defines a virtual CTC channel to channel connection
1840 2 need to be defined on each guest for the CTC driver to use.
1841
1842COUPLE devno userid remote devno
1843 Joins a local virtual device to a remote virtual device
1844 ( commonly used for the CTC driver ).
1845
1846Building a VM ramdisk under CMS which linux can use::
1847
1848 def vfb-<blocksize> <subchannel> <number blocks>
1849
1850blocksize is commonly 4096 for linux.
1851
1852Formatting it::
1853
1854 format <subchannel> <driver letter e.g. x> (blksize <blocksize>
1855
1856Sharing a disk between multiple guests::
1857
1858 LINK userid devno1 devno2 mode password
1859
1860
1861
1862GDB on S390
1863===========
1864N.B. if compiling for debugging gdb works better without optimisation
1865( see Compiling programs for debugging )
1866
1867invocation
1868----------
1869gdb <victim program> <optional corefile>
1870
1871Online help
1872-----------
1873help: gives help on commands
1874
1875e.g.::
1876
1877 help
1878 help display
1879
1880Note gdb's online help is very good use it.
1881
1882
1883Assembly
1884--------
1885info registers:
1886 displays registers other than floating point.
1887
1888info all-registers:
1889 displays floating points as well.
1890
1891disassemble:
1892 disassembles
1893
1894e.g.::
1895
1896 disassemble without parameters will disassemble the current function
1897 disassemble $pc $pc+10
1898
1899Viewing & modifying variables
1900-----------------------------
1901print or p:
1902 displays variable or register
1903
1904e.g. p/x $sp will display the stack pointer
1905
1906display:
1907 prints variable or register each time program stops
1908
1909e.g.::
1910
1911 display/x $pc will display the program counter
1912 display argc
1913
1914undisplay:
1915 undo's display's
1916
1917info breakpoints:
1918 shows all current breakpoints
1919
1920info stack:
1921 shows stack back trace (if this doesn't work too well, I'll show
1922 you the stacktrace by hand below).
1923
1924info locals:
1925 displays local variables.
1926
1927info args:
1928 display current procedure arguments.
1929
1930set args:
1931 will set argc & argv each time the victim program is invoked
1932
1933e.g.::
1934
1935 set <variable>=value
1936 set argc=100
1937 set $pc=0
1938
1939
1940
1941Modifying execution
1942-------------------
1943step:
1944 steps n lines of sourcecode
1945
1946step
1947 steps 1 line.
1948
1949step 100
1950 steps 100 lines of code.
1951
1952next:
1953 like step except this will not step into subroutines
1954
1955stepi:
1956 steps a single machine code instruction.
1957
1958e.g.::
1959
1960 stepi 100
1961
1962nexti:
1963 steps a single machine code instruction but will not step into
1964 subroutines.
1965
1966finish:
1967 will run until exit of the current routine
1968
1969run:
1970 (re)starts a program
1971
1972cont:
1973 continues a program
1974
1975quit:
1976 exits gdb.
1977
1978
1979breakpoints
1980------------
1981
1982break
1983 sets a breakpoint
1984
1985e.g.::
1986
1987 break main
1988 break *$pc
1989 break *0x400618
1990
1991Here's a really useful one for large programs
1992
1993rbr
1994 Set a breakpoint for all functions matching REGEXP
1995
1996e.g.::
1997
1998 rbr 390
1999
2000will set a breakpoint with all functions with 390 in their name.
2001
2002info breakpoints
2003 lists all breakpoints
2004
2005delete:
2006 delete breakpoint by number or delete them all
2007
2008e.g.
2009
2010delete 1
2011 will delete the first breakpoint
2012
2013
2014delete
2015 will delete them all
2016
2017watch:
2018 This will set a watchpoint ( usually hardware assisted ),
2019
2020This will watch a variable till it changes
2021
2022e.g.
2023
2024watch cnt
2025 will watch the variable cnt till it changes.
2026
2027As an aside unfortunately gdb's, architecture independent watchpoint code
2028is inconsistent & not very good, watchpoints usually work but not always.
2029
2030info watchpoints:
2031 Display currently active watchpoints
2032
2033condition: ( another useful one )
2034 Specify breakpoint number N to break only if COND is true.
2035
2036Usage is `condition N COND`, where N is an integer and COND is an
2037expression to be evaluated whenever breakpoint N is reached.
2038
2039
2040
2041User defined functions/macros
2042-----------------------------
2043define: ( Note this is very very useful,simple & powerful )
2044
2045usage define <name> <list of commands> end
2046
2047examples which you should consider putting into .gdbinit in your home
2048directory::
2049
2050 define d
2051 stepi
2052 disassemble $pc $pc+10
2053 end
2054 define e
2055 nexti
2056 disassemble $pc $pc+10
2057 end
2058
2059
2060Other hard to classify stuff
2061----------------------------
2062signal n:
2063 sends the victim program a signal.
2064
2065e.g. `signal 3` will send a SIGQUIT.
2066
2067info signals:
2068 what gdb does when the victim receives certain signals.
2069
2070list:
2071
2072e.g.:
2073
2074list
2075 lists current function source
2076list 1,10
2077 list first 10 lines of current file.
2078
2079list test.c:1,10
2080
2081
2082directory:
2083 Adds directories to be searched for source if gdb cannot find the source.
2084 (note it is a bit sensitive about slashes)
2085
2086e.g. To add the root of the filesystem to the searchpath do::
2087
2088 directory //
2089
2090
2091call <function>
2092This calls a function in the victim program, this is pretty powerful
2093e.g.
2094(gdb) call printf("hello world")
2095outputs:
2096$1 = 11
2097
2098You might now be thinking that the line above didn't work, something extra had
2099to be done.
2100(gdb) call fflush(stdout)
2101hello world$2 = 0
2102As an aside the debugger also calls malloc & free under the hood
2103to make space for the "hello world" string.
2104
2105
2106
2107hints
2108-----
21091) command completion works just like bash
2110 ( if you are a bad typist like me this really helps )
2111
2112e.g. hit br <TAB> & cursor up & down :-).
2113
21142) if you have a debugging problem that takes a few steps to recreate
2115put the steps into a file called .gdbinit in your current working directory
2116if you have defined a few extra useful user defined commands put these in
2117your home directory & they will be read each time gdb is launched.
2118
2119A typical .gdbinit file might be.::
2120
2121 break main
2122 run
2123 break runtime_exception
2124 cont
2125
2126
2127stack chaining in gdb by hand
2128-----------------------------
2129This is done using a the same trick described for VM::
2130
2131 p/x (*($sp+56))&0x7fffffff
2132
2133get the first backchain.
2134
2135For z/Architecture
2136Replace 56 with 112 & ignore the &0x7fffffff
2137in the macros below & do nasty casts to longs like the following
2138as gdb unfortunately deals with printed arguments as ints which
2139messes up everything.
2140
2141i.e. here is a 3rd backchain dereference::
2142
2143 p/x *(long *)(***(long ***)$sp+112)
2144
2145
2146this outputs::
2147
2148 $5 = 0x528f18
2149
2150on my machine.
2151
2152Now you can use::
2153
2154 info symbol (*($sp+56))&0x7fffffff
2155
2156you might see something like::
2157
2158 rl_getc + 36 in section .text
2159
2160telling you what is located at address 0x528f18
2161Now do::
2162
2163 p/x (*(*$sp+56))&0x7fffffff
2164
2165This outputs::
2166
2167 $6 = 0x528ed0
2168
2169Now do::
2170
2171 info symbol (*(*$sp+56))&0x7fffffff
2172 rl_read_key + 180 in section .text
2173
2174now do::
2175
2176 p/x (*(**$sp+56))&0x7fffffff
2177
2178& so on.
2179
2180Disassembling instructions without debug info
2181---------------------------------------------
2182gdb typically complains if there is a lack of debugging
2183symbols in the disassemble command with
2184"No function contains specified address." To get around
2185this do::
2186
2187 x/<number lines to disassemble>xi <address>
2188
2189e.g.::
2190
2191 x/20xi 0x400730
2192
2193
2194
2195Note:
2196 Remember gdb has history just like bash you don't need to retype the
2197 whole line just use the up & down arrows.
2198
2199
2200
2201For more info
2202-------------
2203From your linuxbox do::
2204
2205 man gdb
2206
2207or::
2208
2209 info gdb.
2210
2211core dumps
2212----------
2213
2214What a core dump ?
2215^^^^^^^^^^^^^^^^^^
2216
2217A core dump is a file generated by the kernel (if allowed) which contains the
2218registers and all active pages of the program which has crashed.
2219
2220From this file gdb will allow you to look at the registers, stack trace and
2221memory of the program as if it just crashed on your system. It is usually
2222called core and created in the current working directory.
2223
2224This is very useful in that a customer can mail a core dump to a technical
2225support department and the technical support department can reconstruct what
2226happened. Provided they have an identical copy of this program with debugging
2227symbols compiled in and the source base of this build is available.
2228
2229In short it is far more useful than something like a crash log could ever hope
2230to be.
2231
2232Why have I never seen one ?
2233^^^^^^^^^^^^^^^^^^^^^^^^^^^
2234
2235Probably because you haven't used the command::
2236
2237 ulimit -c unlimited in bash
2238
2239to allow core dumps, now do::
2240
2241 ulimit -a
2242
2243to verify that the limit was accepted.
2244
2245A sample core dump
2246 To create this I'm going to do::
2247
2248 ulimit -c unlimited
2249 gdb
2250
2251to launch gdb (my victim app. ) now be bad & do the following from another
2252telnet/xterm session to the same machine::
2253
2254 ps -aux | grep gdb
2255 kill -SIGSEGV <gdb's pid>
2256
2257or alternatively use `killall -SIGSEGV gdb` if you have the killall command.
2258
2259Now look at the core dump::
2260
2261 ./gdb core
2262
2263Displays the following::
2264
2265 GNU gdb 4.18
2266 Copyright 1998 Free Software Foundation, Inc.
2267 GDB is free software, covered by the GNU General Public License, and you are
2268 welcome to change it and/or distribute copies of it under certain conditions.
2269 Type "show copying" to see the conditions.
2270 There is absolutely no warranty for GDB. Type "show warranty" for details.
2271 This GDB was configured as "s390-ibm-linux"...
2272 Core was generated by `./gdb'.
2273 Program terminated with signal 11, Segmentation fault.
2274 Reading symbols from /usr/lib/libncurses.so.4...done.
2275 Reading symbols from /lib/libm.so.6...done.
2276 Reading symbols from /lib/libc.so.6...done.
2277 Reading symbols from /lib/ld-linux.so.2...done.
2278 #0 0x40126d1a in read () from /lib/libc.so.6
2279 Setting up the environment for debugging gdb.
2280 Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
2281 Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
2282 (top-gdb) info stack
2283 #0 0x40126d1a in read () from /lib/libc.so.6
2284 #1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
2285 #2 0x528ed0 in rl_read_key () at input.c:381
2286 #3 0x5167e6 in readline_internal_char () at readline.c:454
2287 #4 0x5168ee in readline_internal_charloop () at readline.c:507
2288 #5 0x51692c in readline_internal () at readline.c:521
2289 #6 0x5164fe in readline (prompt=0x7ffff810)
2290 at readline.c:349
2291 #7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
2292 annotation_suffix=0x4d6b44 "prompt") at top.c:2091
2293 #8 0x4d6cf0 in command_loop () at top.c:1345
2294 #9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
2295
2296
2297LDD
2298===
2299This is a program which lists the shared libraries which a library needs,
2300Note you also get the relocations of the shared library text segments which
2301help when using objdump --source.
2302
2303e.g.::
2304
2305 ldd ./gdb
2306
2307outputs::
2308
2309 libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
2310 libm.so.6 => /lib/libm.so.6 (0x4005e000)
2311 libc.so.6 => /lib/libc.so.6 (0x40084000)
2312 /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
2313
2314
2315Debugging shared libraries
2316==========================
2317Most programs use shared libraries, however it can be very painful
2318when you single step instruction into a function like printf for the
2319first time & you end up in functions like _dl_runtime_resolve this is
2320the ld.so doing lazy binding, lazy binding is a concept in ELF where
2321shared library functions are not loaded into memory unless they are
2322actually used, great for saving memory but a pain to debug.
2323
2324To get around this either relink the program -static or exit gdb type
2325export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing
2326the program in question.
2327
2328
2329
2330Debugging modules
2331=================
2332As modules are dynamically loaded into the kernel their address can be
2333anywhere to get around this use the -m option with insmod to emit a load
2334map which can be piped into a file if required.
2335
2336The proc file system
2337====================
2338What is it ?.
2339It is a filesystem created by the kernel with files which are created on demand
2340by the kernel if read, or can be used to modify kernel parameters,
2341it is a powerful concept.
2342
2343e.g.::
2344
2345 cat /proc/sys/net/ipv4/ip_forward
2346
2347On my machine outputs::
2348
2349 0
2350
2351telling me ip_forwarding is not on to switch it on I can do::
2352
2353 echo 1 > /proc/sys/net/ipv4/ip_forward
2354
2355cat it again::
2356
2357 cat /proc/sys/net/ipv4/ip_forward
2358
2359On my machine now outputs::
2360
2361 1
2362
2363IP forwarding is on.
2364
2365There is a lot of useful info in here best found by going in and having a look
2366around, so I'll take you through some entries I consider important.
2367
2368All the processes running on the machine have their own entry defined by
2369/proc/<pid>
2370
2371So lets have a look at the init process::
2372
2373 cd /proc/1
2374 cat cmdline
2375
2376emits::
2377
2378 init [2]
2379
2380::
2381
2382 cd /proc/1/fd
2383
2384This contains numerical entries of all the open files,
2385some of these you can cat e.g. stdout (2)::
2386
2387 cat /proc/29/maps
2388
2389on my machine emits::
2390
2391 00400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash
2392 00478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash
2393 0047e000-00492000 rwxp 00000000 00:00 0
2394 40000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so
2395 40015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so
2396 40016000-40017000 rwxp 00000000 00:00 0
2397 40017000-40018000 rw-p 00000000 00:00 0
2398 40018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8
2399 4001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8
2400 4001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so
2401 4010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so
2402 40111000-40114000 rw-p 00000000 00:00 0
2403 40114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so
2404 4011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so
2405 7fffd000-80000000 rwxp ffffe000 00:00 0
2406
2407
2408Showing us the shared libraries init uses where they are in memory
2409& memory access permissions for each virtual memory area.
2410
2411/proc/1/cwd is a softlink to the current working directory.
2412
2413/proc/1/root is the root of the filesystem for this process.
2414
2415/proc/1/mem is the current running processes memory which you
2416can read & write to like a file.
2417
2418strace uses this sometimes as it is a bit faster than the
2419rather inefficient ptrace interface for peeking at DATA.
2420
2421::
2422
2423 cat status
2424
2425 Name: init
2426 State: S (sleeping)
2427 Pid: 1
2428 PPid: 0
2429 Uid: 0 0 0 0
2430 Gid: 0 0 0 0
2431 Groups:
2432 VmSize: 408 kB
2433 VmLck: 0 kB
2434 VmRSS: 208 kB
2435 VmData: 24 kB
2436 VmStk: 8 kB
2437 VmExe: 368 kB
2438 VmLib: 0 kB
2439 SigPnd: 0000000000000000
2440 SigBlk: 0000000000000000
2441 SigIgn: 7fffffffd7f0d8fc
2442 SigCgt: 00000000280b2603
2443 CapInh: 00000000fffffeff
2444 CapPrm: 00000000ffffffff
2445 CapEff: 00000000fffffeff
2446
2447 User PSW: 070de000 80414146
2448 task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
2449 User GPRS:
2450 00000400 00000000 0000000b 7ffffa90
2451 00000000 00000000 00000000 0045d9f4
2452 0045cafc 7ffffa90 7fffff18 0045cb08
2453 00010400 804039e8 80403af8 7ffff8b0
2454 User ACRS:
2455 00000000 00000000 00000000 00000000
2456 00000001 00000000 00000000 00000000
2457 00000000 00000000 00000000 00000000
2458 00000000 00000000 00000000 00000000
2459 Kernel BackChain CallChain BackChain CallChain
2460 004b7ca8 8002bd0c 004b7d18 8002b92c
2461 004b7db8 8005cd50 004b7e38 8005d12a
2462 004b7f08 80019114
2463
2464Showing among other things memory usage & status of some signals &
2465the processes'es registers from the kernel task_structure
2466as well as a backchain which may be useful if a process crashes
2467in the kernel for some unknown reason.
2468
2469Some driver debugging techniques
2470================================
2471debug feature
2472-------------
2473Some of our drivers now support a "debug feature" in
2474/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
2475for more info.
2476
2477e.g.
2478to switch on the lcs "debug feature"::
2479
2480 echo 5 > /proc/s390dbf/lcs/level
2481
2482& then after the error occurred::
2483
2484 cat /proc/s390dbf/lcs/sprintf >/logfile
2485
2486the logfile now contains some information which may help
2487tech support resolve a problem in the field.
2488
2489
2490
2491high level debugging network drivers
2492------------------------------------
2493ifconfig is a quite useful command
2494it gives the current state of network drivers.
2495
2496If you suspect your network device driver is dead
2497one way to check is type::
2498
2499 ifconfig <network device>
2500
2501e.g. tr0
2502
2503You should see something like::
2504
2505 ifconfig tr0
2506 tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48
2507 inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0
2508 UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1
2509 RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
2510 TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
2511 collisions:0 txqueuelen:100
2512
2513if the device doesn't say up
2514try::
2515
2516 /etc/rc.d/init.d/network start
2517
2518( this starts the network stack & hopefully calls ifconfig tr0 up ).
2519ifconfig looks at the output of /proc/net/dev and presents it in a more
2520presentable form.
2521
2522Now ping the device from a machine in the same subnet.
2523
2524if the RX packets count & TX packets counts don't increment you probably
2525have problems.
2526
2527next::
2528
2529 cat /proc/net/arp
2530
2531Do you see any hardware addresses in the cache if not you may have problems.
2532Next try::
2533
2534 ping -c 5 <broadcast_addr>
2535
2536i.e. the Bcast field above in the output of
2537ifconfig. Do you see any replies from machines other than the local machine
2538if not you may have problems. also if the TX packets count in ifconfig
2539hasn't incremented either you have serious problems in your driver
2540(e.g. the txbusy field of the network device being stuck on )
2541or you may have multiple network devices connected.
2542
2543
2544chandev
2545-------
2546There is a new device layer for channel devices, some
2547drivers e.g. lcs are registered with this layer.
2548
2549If the device uses the channel device layer you'll be
2550able to find what interrupts it uses & the current state
2551of the device.
2552
2553See the manpage chandev.8 &type cat /proc/chandev for more info.
2554
2555
2556SysRq
2557=====
2558This is now supported by linux for s/390 & z/Architecture.
2559
2560To enable it do compile the kernel with::
2561
2562 Kernel Hacking -> Magic SysRq Key Enabled
2563
2564Then::
2565
2566 echo "1" > /proc/sys/kernel/sysrq
2567
2568also type::
2569
2570 echo "8" >/proc/sys/kernel/printk
2571
2572To make printk output go to console.
2573
2574On 390 all commands are prefixed with::
2575
2576 ^-
2577
2578e.g.::
2579
2580 ^-t will show tasks.
2581 ^-? or some unknown command will display help.
2582
2583The sysrq key reading is very picky ( I have to type the keys in an
2584xterm session & paste them into the x3270 console )
2585& it may be wise to predefine the keys as described in the VM hints above
2586
2587This is particularly useful for syncing disks unmounting & rebooting
2588if the machine gets partially hung.
2589
2590Read Documentation/admin-guide/sysrq.rst for more info
2591
2592References:
2593===========
2594- Enterprise Systems Architecture Reference Summary
2595- Enterprise Systems Architecture Principles of Operation
2596- Hartmut Penners s390 stack frame sheet.
2597- IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
2598- Various bits of man & info pages of Linux.
2599- Linux & GDB source.
2600- Various info & man pages.
2601- CMS Help on tracing commands.
2602- Linux for s/390 Elf Application Binary Interface
2603- Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
2604- z/Architecture Principles of Operation SA22-7832-00
2605- Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
2606- Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
2607
2608Special Thanks
2609==============
2610Special thanks to Neale Ferguson who maintains a much
2611prettier HTML version of this page at
2612http://linuxvm.org/penguinvm/
2613Bob Grainger Stefan Bader & others for reporting bugs
diff --git a/Documentation/s390/driver-model.txt b/Documentation/s390/driver-model.rst
index ed265cf54cde..ad4bc2dbea43 100644
--- a/Documentation/s390/driver-model.txt
+++ b/Documentation/s390/driver-model.rst
@@ -1,5 +1,6 @@
1=============================
1S/390 driver model interfaces 2S/390 driver model interfaces
2----------------------------- 3=============================
3 4
41. CCW devices 51. CCW devices
5-------------- 6--------------
@@ -7,13 +8,13 @@ S/390 driver model interfaces
7All devices which can be addressed by means of ccws are called 'CCW devices' - 8All devices which can be addressed by means of ccws are called 'CCW devices' -
8even if they aren't actually driven by ccws. 9even if they aren't actually driven by ccws.
9 10
10All ccw devices are accessed via a subchannel, this is reflected in the 11All ccw devices are accessed via a subchannel, this is reflected in the
11structures under devices/: 12structures under devices/::
12 13
13devices/ 14 devices/
14 - system/ 15 - system/
15 - css0/ 16 - css0/
16 - 0.0.0000/0.0.0815/ 17 - 0.0.0000/0.0.0815/
17 - 0.0.0001/0.0.4711/ 18 - 0.0.0001/0.0.4711/
18 - 0.0.0002/ 19 - 0.0.0002/
19 - 0.1.0000/0.1.1234/ 20 - 0.1.0000/0.1.1234/
@@ -35,14 +36,18 @@ be found under bus/ccw/devices/.
35 36
36All ccw devices export some data via sysfs. 37All ccw devices export some data via sysfs.
37 38
38cutype: The control unit type / model. 39cutype:
40 The control unit type / model.
39 41
40devtype: The device type / model, if applicable. 42devtype:
43 The device type / model, if applicable.
41 44
42availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for 45availability:
46 Can be 'good' or 'boxed'; 'no path' or 'no device' for
43 disconnected devices. 47 disconnected devices.
44 48
45online: An interface to set the device online and offline. 49online:
50 An interface to set the device online and offline.
46 In the special case of the device being disconnected (see the 51 In the special case of the device being disconnected (see the
47 notify function under 1.2), piping 0 to online will forcibly delete 52 notify function under 1.2), piping 0 to online will forcibly delete
48 the device. 53 the device.
@@ -52,9 +57,11 @@ The device drivers can add entries to export per-device data and interfaces.
52There is also some data exported on a per-subchannel basis (see under 57There is also some data exported on a per-subchannel basis (see under
53bus/css/devices/): 58bus/css/devices/):
54 59
55chpids: Via which chpids the device is connected. 60chpids:
61 Via which chpids the device is connected.
56 62
57pimpampom: The path installed, path available and path operational masks. 63pimpampom:
64 The path installed, path available and path operational masks.
58 65
59There also might be additional data, for example for block devices. 66There also might be additional data, for example for block devices.
60 67
@@ -74,77 +81,93 @@ b. After a. has been performed, if necessary, the device is finally brought up
74------------------------------------ 81------------------------------------
75 82
76The basic struct ccw_device and struct ccw_driver data structures can be found 83The basic struct ccw_device and struct ccw_driver data structures can be found
77under include/asm/ccwdev.h. 84under include/asm/ccwdev.h::
78 85
79struct ccw_device { 86 struct ccw_device {
80 spinlock_t *ccwlock; 87 spinlock_t *ccwlock;
81 struct ccw_device_private *private; 88 struct ccw_device_private *private;
82 struct ccw_device_id id; 89 struct ccw_device_id id;
83 90
84 struct ccw_driver *drv; 91 struct ccw_driver *drv;
85 struct device dev; 92 struct device dev;
86 int online; 93 int online;
87 94
88 void (*handler) (struct ccw_device *dev, unsigned long intparm, 95 void (*handler) (struct ccw_device *dev, unsigned long intparm,
89 struct irb *irb); 96 struct irb *irb);
90}; 97 };
91 98
92struct ccw_driver { 99 struct ccw_driver {
93 struct module *owner; 100 struct module *owner;
94 struct ccw_device_id *ids; 101 struct ccw_device_id *ids;
95 int (*probe) (struct ccw_device *); 102 int (*probe) (struct ccw_device *);
96 int (*remove) (struct ccw_device *); 103 int (*remove) (struct ccw_device *);
97 int (*set_online) (struct ccw_device *); 104 int (*set_online) (struct ccw_device *);
98 int (*set_offline) (struct ccw_device *); 105 int (*set_offline) (struct ccw_device *);
99 int (*notify) (struct ccw_device *, int); 106 int (*notify) (struct ccw_device *, int);
100 struct device_driver driver; 107 struct device_driver driver;
101 char *name; 108 char *name;
102}; 109 };
103 110
104The 'private' field contains data needed for internal i/o operation only, and 111The 'private' field contains data needed for internal i/o operation only, and
105is not available to the device driver. 112is not available to the device driver.
106 113
107Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models 114Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
108and/or device types/models it is interested. This information can later be found 115and/or device types/models it is interested. This information can later be found
109in the struct ccw_device_id fields: 116in the struct ccw_device_id fields::
110 117
111struct ccw_device_id { 118 struct ccw_device_id {
112 __u16 match_flags; 119 __u16 match_flags;
113 120
114 __u16 cu_type; 121 __u16 cu_type;
115 __u16 dev_type; 122 __u16 dev_type;
116 __u8 cu_model; 123 __u8 cu_model;
117 __u8 dev_model; 124 __u8 dev_model;
118 125
119 unsigned long driver_info; 126 unsigned long driver_info;
120}; 127 };
121 128
122The functions in ccw_driver should be used in the following way: 129The functions in ccw_driver should be used in the following way:
123probe: This function is called by the device layer for each device the driver 130
131probe:
132 This function is called by the device layer for each device the driver
124 is interested in. The driver should only allocate private structures 133 is interested in. The driver should only allocate private structures
125 to put in dev->driver_data and create attributes (if needed). Also, 134 to put in dev->driver_data and create attributes (if needed). Also,
126 the interrupt handler (see below) should be set here. 135 the interrupt handler (see below) should be set here.
127 136
128int (*probe) (struct ccw_device *cdev); 137::
138
139 int (*probe) (struct ccw_device *cdev);
129 140
130Parameters: cdev - the device to be probed. 141Parameters:
142 cdev
143 - the device to be probed.
131 144
132 145
133remove: This function is called by the device layer upon removal of the driver, 146remove:
147 This function is called by the device layer upon removal of the driver,
134 the device or the module. The driver should perform cleanups here. 148 the device or the module. The driver should perform cleanups here.
135 149
136int (*remove) (struct ccw_device *cdev); 150::
137 151
138Parameters: cdev - the device to be removed. 152 int (*remove) (struct ccw_device *cdev);
139 153
154Parameters:
155 cdev
156 - the device to be removed.
140 157
141set_online: This function is called by the common I/O layer when the device is 158
159set_online:
160 This function is called by the common I/O layer when the device is
142 activated via the 'online' attribute. The driver should finally 161 activated via the 'online' attribute. The driver should finally
143 setup and activate the device here. 162 setup and activate the device here.
144 163
145int (*set_online) (struct ccw_device *); 164::
165
166 int (*set_online) (struct ccw_device *);
146 167
147Parameters: cdev - the device to be activated. The common layer has 168Parameters:
169 cdev
170 - the device to be activated. The common layer has
148 verified that the device is not already online. 171 verified that the device is not already online.
149 172
150 173
@@ -152,15 +175,22 @@ set_offline: This function is called by the common I/O layer when the device is
152 de-activated via the 'online' attribute. The driver should shut 175 de-activated via the 'online' attribute. The driver should shut
153 down the device, but not de-allocate its private data. 176 down the device, but not de-allocate its private data.
154 177
155int (*set_offline) (struct ccw_device *); 178::
156 179
157Parameters: cdev - the device to be deactivated. The common layer has 180 int (*set_offline) (struct ccw_device *);
181
182Parameters:
183 cdev
184 - the device to be deactivated. The common layer has
158 verified that the device is online. 185 verified that the device is online.
159 186
160 187
161notify: This function is called by the common I/O layer for some state changes 188notify:
189 This function is called by the common I/O layer for some state changes
162 of the device. 190 of the device.
191
163 Signalled to the driver are: 192 Signalled to the driver are:
193
164 * In online state, device detached (CIO_GONE) or last path gone 194 * In online state, device detached (CIO_GONE) or last path gone
165 (CIO_NO_PATH). The driver must return !0 to keep the device; for 195 (CIO_NO_PATH). The driver must return !0 to keep the device; for
166 return code 0, the device will be deleted as usual (also when no 196 return code 0, the device will be deleted as usual (also when no
@@ -173,32 +203,40 @@ notify: This function is called by the common I/O layer for some state changes
173 return code of the notify function the device driver signals if it 203 return code of the notify function the device driver signals if it
174 wants the device back: !0 for keeping, 0 to make the device being 204 wants the device back: !0 for keeping, 0 to make the device being
175 removed and re-registered. 205 removed and re-registered.
176
177int (*notify) (struct ccw_device *, int);
178 206
179Parameters: cdev - the device whose state changed. 207::
180 event - the event that happened. This can be one of CIO_GONE, 208
181 CIO_NO_PATH or CIO_OPER. 209 int (*notify) (struct ccw_device *, int);
210
211Parameters:
212 cdev
213 - the device whose state changed.
214
215 event
216 - the event that happened. This can be one of CIO_GONE,
217 CIO_NO_PATH or CIO_OPER.
182 218
183The handler field of the struct ccw_device is meant to be set to the interrupt 219The handler field of the struct ccw_device is meant to be set to the interrupt
184handler for the device. In order to accommodate drivers which use several 220handler for the device. In order to accommodate drivers which use several
185distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device 221distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device
186instead of ccw_driver. 222instead of ccw_driver.
187The handler is registered with the common layer during set_online() processing 223The handler is registered with the common layer during set_online() processing
188before the driver is called, and is deregistered during set_offline() after the 224before the driver is called, and is deregistered during set_offline() after the
189driver has been called. Also, after registering / before deregistering, path 225driver has been called. Also, after registering / before deregistering, path
190grouping resp. disbanding of the path group (if applicable) are performed. 226grouping resp. disbanding of the path group (if applicable) are performed.
191 227
192void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb); 228::
193 229
194Parameters: dev - the device the handler is called for 230 void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
231
232Parameters: dev - the device the handler is called for
195 intparm - the intparm which allows the device driver to identify 233 intparm - the intparm which allows the device driver to identify
196 the i/o the interrupt is associated with, or to recognize 234 the i/o the interrupt is associated with, or to recognize
197 the interrupt as unsolicited. 235 the interrupt as unsolicited.
198 irb - interruption response block which contains the accumulated 236 irb - interruption response block which contains the accumulated
199 status. 237 status.
200 238
201The device driver is called from the common ccw_device layer and can retrieve 239The device driver is called from the common ccw_device layer and can retrieve
202information about the interrupt from the irb parameter. 240information about the interrupt from the irb parameter.
203 241
204 242
@@ -237,23 +275,27 @@ only the logical state and not the physical state, since we cannot track the
237latter consistently due to lacking machine support (we don't need to be aware 275latter consistently due to lacking machine support (we don't need to be aware
238of it anyway). 276of it anyway).
239 277
240status - Can be 'online' or 'offline'. 278status
279 - Can be 'online' or 'offline'.
241 Piping 'on' or 'off' sets the chpid logically online/offline. 280 Piping 'on' or 'off' sets the chpid logically online/offline.
242 Piping 'on' to an online chpid triggers path reprobing for all devices 281 Piping 'on' to an online chpid triggers path reprobing for all devices
243 the chpid connects to. This can be used to force the kernel to re-use 282 the chpid connects to. This can be used to force the kernel to re-use
244 a channel path the user knows to be online, but the machine hasn't 283 a channel path the user knows to be online, but the machine hasn't
245 created a machine check for. 284 created a machine check for.
246 285
247type - The physical type of the channel path. 286type
287 - The physical type of the channel path.
248 288
249shared - Whether the channel path is shared. 289shared
290 - Whether the channel path is shared.
250 291
251cmg - The channel measurement group. 292cmg
293 - The channel measurement group.
252 294
2533. System devices 2953. System devices
254----------------- 296-----------------
255 297
2563.1 xpram 2983.1 xpram
257--------- 299---------
258 300
259xpram shows up under devices/system/ as 'xpram'. 301xpram shows up under devices/system/ as 'xpram'.
@@ -279,9 +321,8 @@ Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interfa
279number is assigned sequentially to the connections defined via the 'connection' 321number is assigned sequentially to the connections defined via the 'connection'
280attribute. 322attribute.
281 323
282user - shows the connection partner. 324user
283 325 - shows the connection partner.
284buffer - maximum buffer size.
285 Pipe to it to change buffer size.
286
287 326
327buffer
328 - maximum buffer size. Pipe to it to change buffer size.
diff --git a/Documentation/s390/index.rst b/Documentation/s390/index.rst
new file mode 100644
index 000000000000..1a914da2a07b
--- /dev/null
+++ b/Documentation/s390/index.rst
@@ -0,0 +1,30 @@
1:orphan:
2
3=================
4s390 Architecture
5=================
6
7.. toctree::
8 :maxdepth: 1
9
10 cds
11 3270
12 debugging390
13 driver-model
14 monreader
15 qeth
16 s390dbf
17 vfio-ap
18 vfio-ccw
19 zfcpdump
20 dasd
21 common_io
22
23 text_files
24
25.. only:: subproject and html
26
27 Indices
28 =======
29
30 * :ref:`genindex`
diff --git a/Documentation/s390/monreader.txt b/Documentation/s390/monreader.rst
index d3729585fdb0..1e857575c113 100644
--- a/Documentation/s390/monreader.txt
+++ b/Documentation/s390/monreader.rst
@@ -1,24 +1,26 @@
1=================================================
2Linux API for read access to z/VM Monitor Records
3=================================================
1 4
2Date : 2004-Nov-26 5Date : 2004-Nov-26
6
3Author: Gerald Schaefer (geraldsc@de.ibm.com) 7Author: Gerald Schaefer (geraldsc@de.ibm.com)
4 8
5 9
6 Linux API for read access to z/VM Monitor Records
7 =================================================
8 10
9 11
10Description 12Description
11=========== 13===========
12This item delivers a new Linux API in the form of a misc char device that is 14This item delivers a new Linux API in the form of a misc char device that is
13usable from user space and allows read access to the z/VM Monitor Records 15usable from user space and allows read access to the z/VM Monitor Records
14collected by the *MONITOR System Service of z/VM. 16collected by the `*MONITOR` System Service of z/VM.
15 17
16 18
17User Requirements 19User Requirements
18================= 20=================
19The z/VM guest on which you want to access this API needs to be configured in 21The z/VM guest on which you want to access this API needs to be configured in
20order to allow IUCV connections to the *MONITOR service, i.e. it needs the 22order to allow IUCV connections to the `*MONITOR` service, i.e. it needs the
21IUCV *MONITOR statement in its user entry. If the monitor DCSS to be used is 23IUCV `*MONITOR` statement in its user entry. If the monitor DCSS to be used is
22restricted (likely), you also need the NAMESAVE <DCSS NAME> statement. 24restricted (likely), you also need the NAMESAVE <DCSS NAME> statement.
23This item will use the IUCV device driver to access the z/VM services, so you 25This item will use the IUCV device driver to access the z/VM services, so you
24need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1. 26need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1.
@@ -50,7 +52,9 @@ Your guest virtual storage has to end below the starting address of the DCSS
50and you have to specify the "mem=" kernel parameter in your parmfile with a 52and you have to specify the "mem=" kernel parameter in your parmfile with a
51value greater than the ending address of the DCSS. 53value greater than the ending address of the DCSS.
52 54
53Example: DEF STOR 140M 55Example::
56
57 DEF STOR 140M
54 58
55This defines 140MB storage size for your guest, the parameter "mem=160M" is 59This defines 140MB storage size for your guest, the parameter "mem=160M" is
56added to the parmfile. 60added to the parmfile.
@@ -66,24 +70,27 @@ kernel, the kernel parameter "monreader.mondcss=<DCSS NAME>" can be specified
66in the parmfile. 70in the parmfile.
67 71
68The default name for the DCSS is "MONDCSS" if none is specified. In case that 72The default name for the DCSS is "MONDCSS" if none is specified. In case that
69there are other users already connected to the *MONITOR service (e.g. 73there are other users already connected to the `*MONITOR` service (e.g.
70Performance Toolkit), the monitor DCSS is already defined and you have to use 74Performance Toolkit), the monitor DCSS is already defined and you have to use
71the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name 75the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name
72of the monitor DCSS, if already defined, and the users connected to the 76of the monitor DCSS, if already defined, and the users connected to the
73*MONITOR service. 77`*MONITOR` service.
74Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor 78Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor
75DCSS if your z/VM doesn't have one already, you need Class E privileges to 79DCSS if your z/VM doesn't have one already, you need Class E privileges to
76define and save a DCSS. 80define and save a DCSS.
77 81
78Example: 82Example:
79-------- 83--------
80modprobe monreader mondcss=MYDCSS 84
85::
86
87 modprobe monreader mondcss=MYDCSS
81 88
82This loads the module and sets the DCSS name to "MYDCSS". 89This loads the module and sets the DCSS name to "MYDCSS".
83 90
84NOTE: 91NOTE:
85----- 92-----
86This API provides no interface to control the *MONITOR service, e.g. specify 93This API provides no interface to control the `*MONITOR` service, e.g. specify
87which data should be collected. This can be done by the CP command MONITOR 94which data should be collected. This can be done by the CP command MONITOR
88(Class E privileged), see "CP Command and Utility Reference". 95(Class E privileged), see "CP Command and Utility Reference".
89 96
@@ -98,6 +105,7 @@ If your distribution does not support udev, a device node will not be created
98automatically and you have to create it manually after loading the module. 105automatically and you have to create it manually after loading the module.
99Therefore you need to know the major and minor numbers of the device. These 106Therefore you need to know the major and minor numbers of the device. These
100numbers can be found in /sys/class/misc/monreader/dev. 107numbers can be found in /sys/class/misc/monreader/dev.
108
101Typing cat /sys/class/misc/monreader/dev will give an output of the form 109Typing cat /sys/class/misc/monreader/dev will give an output of the form
102<major>:<minor>. The device node can be created via the mknod command, enter 110<major>:<minor>. The device node can be created via the mknod command, enter
103mknod <name> c <major> <minor>, where <name> is the name of the device node 111mknod <name> c <major> <minor>, where <name> is the name of the device node
@@ -105,10 +113,13 @@ to be created.
105 113
106Example: 114Example:
107-------- 115--------
108# modprobe monreader 116
109# cat /sys/class/misc/monreader/dev 117::
11010:63 118
111# mknod /dev/monreader c 10 63 119 # modprobe monreader
120 # cat /sys/class/misc/monreader/dev
121 10:63
122 # mknod /dev/monreader c 10 63
112 123
113This loads the module with the default monitor DCSS (MONDCSS) and creates a 124This loads the module with the default monitor DCSS (MONDCSS) and creates a
114device node. 125device node.
@@ -133,20 +144,21 @@ last byte of data. The start address is needed to handle "end-of-frame" records
133correctly (domain 1, record 13), i.e. it can be used to determine the record 144correctly (domain 1, record 13), i.e. it can be used to determine the record
134start offset relative to a 4K page (frame) boundary. 145start offset relative to a 4K page (frame) boundary.
135 146
136See "Appendix A: *MONITOR" in the "z/VM Performance" document for a description 147See "Appendix A: `*MONITOR`" in the "z/VM Performance" document for a description
137of the monitor control element layout. The layout of the monitor records can 148of the monitor control element layout. The layout of the monitor records can
138be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html 149be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html
139 150
140The layout of the data stream provided by the monreader device is as follows: 151The layout of the data stream provided by the monreader device is as follows::
141... 152
142<0 byte read> 153 ...
143<first MCE> \ 154 <0 byte read>
144<first set of records> | 155 <first MCE> \
145... |- data set 156 <first set of records> |
146<last MCE> | 157 ... |- data set
147<last set of records> / 158 <last MCE> |
148<0 byte read> 159 <last set of records> /
149... 160 <0 byte read>
161 ...
150 162
151There may be more than one combination of MCE and corresponding record set 163There may be more than one combination of MCE and corresponding record set
152within one data set and the end of each data set is indicated by a successful 164within one data set and the end of each data set is indicated by a successful
@@ -165,15 +177,19 @@ As with most char devices, error conditions are indicated by returning a
165negative value for the number of bytes read. In this case, the errno variable 177negative value for the number of bytes read. In this case, the errno variable
166indicates the error condition: 178indicates the error condition:
167 179
168EIO: reply failed, read data is invalid and the application 180EIO:
181 reply failed, read data is invalid and the application
169 should discard the data read since the last successful read with 0 size. 182 should discard the data read since the last successful read with 0 size.
170EFAULT: copy_to_user failed, read data is invalid and the application should 183EFAULT:
171 discard the data read since the last successful read with 0 size. 184 copy_to_user failed, read data is invalid and the application should
172EAGAIN: occurs on a non-blocking read if there is no data available at the 185 discard the data read since the last successful read with 0 size.
173 moment. There is no data missing or corrupted, just try again or rather 186EAGAIN:
174 use polling for non-blocking reads. 187 occurs on a non-blocking read if there is no data available at the
175EOVERFLOW: message limit reached, the data read since the last successful 188 moment. There is no data missing or corrupted, just try again or rather
176 read with 0 size is valid but subsequent records may be missing. 189 use polling for non-blocking reads.
190EOVERFLOW:
191 message limit reached, the data read since the last successful
192 read with 0 size is valid but subsequent records may be missing.
177 193
178In the last case (EOVERFLOW) there may be missing data, in the first two cases 194In the last case (EOVERFLOW) there may be missing data, in the first two cases
179(EIO, EFAULT) there will be missing data. It's up to the application if it will 195(EIO, EFAULT) there will be missing data. It's up to the application if it will
@@ -183,7 +199,7 @@ Open:
183----- 199-----
184Only one user is allowed to open the char device. If it is already in use, the 200Only one user is allowed to open the char device. If it is already in use, the
185open function will fail (return a negative value) and set errno to EBUSY. 201open function will fail (return a negative value) and set errno to EBUSY.
186The open function may also fail if an IUCV connection to the *MONITOR service 202The open function may also fail if an IUCV connection to the `*MONITOR` service
187cannot be established. In this case errno will be set to EIO and an error 203cannot be established. In this case errno will be set to EIO and an error
188message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER 204message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER
189codes are described in the "z/VM Performance" book, Appendix A. 205codes are described in the "z/VM Performance" book, Appendix A.
@@ -194,4 +210,3 @@ As soon as the device is opened, incoming messages will be accepted and they
194will account for the message limit, i.e. opening the device without reading 210will account for the message limit, i.e. opening the device without reading
195from it will provoke the "message limit reached" error (EOVERFLOW error code) 211from it will provoke the "message limit reached" error (EOVERFLOW error code)
196eventually. 212eventually.
197
diff --git a/Documentation/s390/qeth.txt b/Documentation/s390/qeth.rst
index aa06fcf5f8c2..f02fdaa68de0 100644
--- a/Documentation/s390/qeth.txt
+++ b/Documentation/s390/qeth.rst
@@ -1,8 +1,12 @@
1=============================
1IBM s390 QDIO Ethernet Driver 2IBM s390 QDIO Ethernet Driver
3=============================
2 4
3OSA and HiperSockets Bridge Port Support 5OSA and HiperSockets Bridge Port Support
6========================================
4 7
5Uevents 8Uevents
9-------
6 10
7To generate the events the device must be assigned a role of either 11To generate the events the device must be assigned a role of either
8a primary or a secondary Bridge Port. For more information, see 12a primary or a secondary Bridge Port. For more information, see
@@ -13,12 +17,15 @@ of some configured Bridge Port device on the channel changes, a udev
13event with ACTION=CHANGE is emitted on behalf of the corresponding 17event with ACTION=CHANGE is emitted on behalf of the corresponding
14ccwgroup device. The event has the following attributes: 18ccwgroup device. The event has the following attributes:
15 19
16BRIDGEPORT=statechange - indicates that the Bridge Port device changed 20BRIDGEPORT=statechange
21 indicates that the Bridge Port device changed
17 its state. 22 its state.
18 23
19ROLE={primary|secondary|none} - the role assigned to the port. 24ROLE={primary|secondary|none}
25 the role assigned to the port.
20 26
21STATE={active|standby|inactive} - the newly assumed state of the port. 27STATE={active|standby|inactive}
28 the newly assumed state of the port.
22 29
23When run on HiperSockets Bridge Capable Port hardware with host address 30When run on HiperSockets Bridge Capable Port hardware with host address
24notifications enabled, a udev event with ACTION=CHANGE is emitted. 31notifications enabled, a udev event with ACTION=CHANGE is emitted.
@@ -26,25 +33,32 @@ It is emitted on behalf of the corresponding ccwgroup device when a host
26or a VLAN is registered or unregistered on the network served by the device. 33or a VLAN is registered or unregistered on the network served by the device.
27The event has the following attributes: 34The event has the following attributes:
28 35
29BRIDGEDHOST={reset|register|deregister|abort} - host address 36BRIDGEDHOST={reset|register|deregister|abort}
37 host address
30 notifications are started afresh, a new host or VLAN is registered or 38 notifications are started afresh, a new host or VLAN is registered or
31 deregistered on the Bridge Port HiperSockets channel, or address 39 deregistered on the Bridge Port HiperSockets channel, or address
32 notifications are aborted. 40 notifications are aborted.
33 41
34VLAN=numeric-vlan-id - VLAN ID on which the event occurred. Not included 42VLAN=numeric-vlan-id
43 VLAN ID on which the event occurred. Not included
35 if no VLAN is involved in the event. 44 if no VLAN is involved in the event.
36 45
37MAC=xx:xx:xx:xx:xx:xx - MAC address of the host that is being registered 46MAC=xx:xx:xx:xx:xx:xx
47 MAC address of the host that is being registered
38 or deregistered from the HiperSockets channel. Not reported if the 48 or deregistered from the HiperSockets channel. Not reported if the
39 event reports the creation or destruction of a VLAN. 49 event reports the creation or destruction of a VLAN.
40 50
41NTOK_BUSID=x.y.zzzz - device bus ID (CSSID, SSID and device number). 51NTOK_BUSID=x.y.zzzz
52 device bus ID (CSSID, SSID and device number).
42 53
43NTOK_IID=xx - device IID. 54NTOK_IID=xx
55 device IID.
44 56
45NTOK_CHPID=xx - device CHPID. 57NTOK_CHPID=xx
58 device CHPID.
46 59
47NTOK_CHID=xxxx - device channel ID. 60NTOK_CHID=xxxx
61 device channel ID.
48 62
49Note that the NTOK_* attributes refer to devices other than the one 63Note that the `NTOK_*` attributes refer to devices other than the one
50connected to the system on which the OS is running. 64connected to the system on which the OS is running.
diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst
new file mode 100644
index 000000000000..cdb36842b898
--- /dev/null
+++ b/Documentation/s390/s390dbf.rst
@@ -0,0 +1,487 @@
1==================
2S390 Debug Feature
3==================
4
5files:
6 - arch/s390/kernel/debug.c
7 - arch/s390/include/asm/debug.h
8
9Description:
10------------
11The goal of this feature is to provide a kernel debug logging API
12where log records can be stored efficiently in memory, where each component
13(e.g. device drivers) can have one separate debug log.
14One purpose of this is to inspect the debug logs after a production system crash
15in order to analyze the reason for the crash.
16
17If the system still runs but only a subcomponent which uses dbf fails,
18it is possible to look at the debug logs on a live system via the Linux
19debugfs filesystem.
20
21The debug feature may also very useful for kernel and driver development.
22
23Design:
24-------
25Kernel components (e.g. device drivers) can register themselves at the debug
26feature with the function call :c:func:`debug_register()`.
27This function initializes a
28debug log for the caller. For each debug log exists a number of debug areas
29where exactly one is active at one time. Each debug area consists of contiguous
30pages in memory. In the debug areas there are stored debug entries (log records)
31which are written by event- and exception-calls.
32
33An event-call writes the specified debug entry to the active debug
34area and updates the log pointer for the active area. If the end
35of the active debug area is reached, a wrap around is done (ring buffer)
36and the next debug entry will be written at the beginning of the active
37debug area.
38
39An exception-call writes the specified debug entry to the log and
40switches to the next debug area. This is done in order to be sure
41that the records which describe the origin of the exception are not
42overwritten when a wrap around for the current area occurs.
43
44The debug areas themselves are also ordered in form of a ring buffer.
45When an exception is thrown in the last debug area, the following debug
46entries are then written again in the very first area.
47
48There are four versions for the event- and exception-calls: One for
49logging raw data, one for text, one for numbers (unsigned int and long),
50and one for sprintf-like formatted strings.
51
52Each debug entry contains the following data:
53
54- Timestamp
55- Cpu-Number of calling task
56- Level of debug entry (0...6)
57- Return Address to caller
58- Flag, if entry is an exception or not
59
60The debug logs can be inspected in a live system through entries in
61the debugfs-filesystem. Under the toplevel directory "``s390dbf``" there is
62a directory for each registered component, which is named like the
63corresponding component. The debugfs normally should be mounted to
64``/sys/kernel/debug`` therefore the debug feature can be accessed under
65``/sys/kernel/debug/s390dbf``.
66
67The content of the directories are files which represent different views
68to the debug log. Each component can decide which views should be
69used through registering them with the function :c:func:`debug_register_view()`.
70Predefined views for hex/ascii, sprintf and raw binary data are provided.
71It is also possible to define other views. The content of
72a view can be inspected simply by reading the corresponding debugfs file.
73
74All debug logs have an actual debug level (range from 0 to 6).
75The default level is 3. Event and Exception functions have a :c:data:`level`
76parameter. Only debug entries with a level that is lower or equal
77than the actual level are written to the log. This means, when
78writing events, high priority log entries should have a low level
79value whereas low priority entries should have a high one.
80The actual debug level can be changed with the help of the debugfs-filesystem
81through writing a number string "x" to the ``level`` debugfs file which is
82provided for every debug log. Debugging can be switched off completely
83by using "-" on the ``level`` debugfs file.
84
85Example::
86
87 > echo "-" > /sys/kernel/debug/s390dbf/dasd/level
88
89It is also possible to deactivate the debug feature globally for every
90debug log. You can change the behavior using 2 sysctl parameters in
91``/proc/sys/s390dbf``:
92
93There are currently 2 possible triggers, which stop the debug feature
94globally. The first possibility is to use the ``debug_active`` sysctl. If
95set to 1 the debug feature is running. If ``debug_active`` is set to 0 the
96debug feature is turned off.
97
98The second trigger which stops the debug feature is a kernel oops.
99That prevents the debug feature from overwriting debug information that
100happened before the oops. After an oops you can reactivate the debug feature
101by piping 1 to ``/proc/sys/s390dbf/debug_active``. Nevertheless, it's not
102suggested to use an oopsed kernel in a production environment.
103
104If you want to disallow the deactivation of the debug feature, you can use
105the ``debug_stoppable`` sysctl. If you set ``debug_stoppable`` to 0 the debug
106feature cannot be stopped. If the debug feature is already stopped, it
107will stay deactivated.
108
109Kernel Interfaces:
110------------------
111
112.. kernel-doc:: arch/s390/kernel/debug.c
113.. kernel-doc:: arch/s390/include/asm/debug.h
114
115Predefined views:
116-----------------
117
118.. code-block:: c
119
120 extern struct debug_view debug_hex_ascii_view;
121
122 extern struct debug_view debug_raw_view;
123
124 extern struct debug_view debug_sprintf_view;
125
126Examples
127--------
128
129.. code-block:: c
130
131 /*
132 * hex_ascii- + raw-view Example
133 */
134
135 #include <linux/init.h>
136 #include <asm/debug.h>
137
138 static debug_info_t *debug_info;
139
140 static int init(void)
141 {
142 /* register 4 debug areas with one page each and 4 byte data field */
143
144 debug_info = debug_register("test", 1, 4, 4 );
145 debug_register_view(debug_info, &debug_hex_ascii_view);
146 debug_register_view(debug_info, &debug_raw_view);
147
148 debug_text_event(debug_info, 4 , "one ");
149 debug_int_exception(debug_info, 4, 4711);
150 debug_event(debug_info, 3, &debug_info, 4);
151
152 return 0;
153 }
154
155 static void cleanup(void)
156 {
157 debug_unregister(debug_info);
158 }
159
160 module_init(init);
161 module_exit(cleanup);
162
163.. code-block:: c
164
165 /*
166 * sprintf-view Example
167 */
168
169 #include <linux/init.h>
170 #include <asm/debug.h>
171
172 static debug_info_t *debug_info;
173
174 static int init(void)
175 {
176 /* register 4 debug areas with one page each and data field for */
177 /* format string pointer + 2 varargs (= 3 * sizeof(long)) */
178
179 debug_info = debug_register("test", 1, 4, sizeof(long) * 3);
180 debug_register_view(debug_info, &debug_sprintf_view);
181
182 debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
183 debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
184
185 return 0;
186 }
187
188 static void cleanup(void)
189 {
190 debug_unregister(debug_info);
191 }
192
193 module_init(init);
194 module_exit(cleanup);
195
196Debugfs Interface
197-----------------
198Views to the debug logs can be investigated through reading the corresponding
199debugfs-files:
200
201Example::
202
203 > ls /sys/kernel/debug/s390dbf/dasd
204 flush hex_ascii level pages raw
205 > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
206 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
207 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
208 00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
209 00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
210 01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
211 01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
212 01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
213 01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
214 01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
215 01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
216
217See section about predefined views for explanation of the above output!
218
219Changing the debug level
220------------------------
221
222Example::
223
224
225 > cat /sys/kernel/debug/s390dbf/dasd/level
226 3
227 > echo "5" > /sys/kernel/debug/s390dbf/dasd/level
228 > cat /sys/kernel/debug/s390dbf/dasd/level
229 5
230
231Flushing debug areas
232--------------------
233Debug areas can be flushed with piping the number of the desired
234area (0...n) to the debugfs file "flush". When using "-" all debug areas
235are flushed.
236
237Examples:
238
2391. Flush debug area 0::
240
241 > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
242
2432. Flush all debug areas::
244
245 > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
246
247Changing the size of debug areas
248------------------------------------
249It is possible the change the size of debug areas through piping
250the number of pages to the debugfs file "pages". The resize request will
251also flush the debug areas.
252
253Example:
254
255Define 4 pages for the debug areas of debug feature "dasd"::
256
257 > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
258
259Stopping the debug feature
260--------------------------
261Example:
262
2631. Check if stopping is allowed::
264
265 > cat /proc/sys/s390dbf/debug_stoppable
266
2672. Stop debug feature::
268
269 > echo 0 > /proc/sys/s390dbf/debug_active
270
271crash Interface
272----------------
273The ``crash`` tool since v5.1.0 has a built-in command
274``s390dbf`` to display all the debug logs or export them to the file system.
275With this tool it is possible
276to investigate the debug logs on a live system and with a memory dump after
277a system crash.
278
279Investigating raw memory
280------------------------
281One last possibility to investigate the debug logs at a live
282system and after a system crash is to look at the raw memory
283under VM or at the Service Element.
284It is possible to find the anchor of the debug-logs through
285the ``debug_area_first`` symbol in the System map. Then one has
286to follow the correct pointers of the data-structures defined
287in debug.h and find the debug-areas in memory.
288Normally modules which use the debug feature will also have
289a global variable with the pointer to the debug-logs. Following
290this pointer it will also be possible to find the debug logs in
291memory.
292
293For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
294for the length of the data field in :c:func:`debug_register()` in
295order to see the debug entries well formatted.
296
297
298Predefined Views
299----------------
300
301There are three predefined views: hex_ascii, raw and sprintf.
302The hex_ascii view shows the data field in hex and ascii representation
303(e.g. ``45 43 4b 44 | ECKD``).
304The raw view returns a bytestream as the debug areas are stored in memory.
305
306The sprintf view formats the debug entries in the same way as the sprintf
307function would do. The sprintf event/exception functions write to the
308debug entry a pointer to the format string (size = sizeof(long))
309and for each vararg a long value. So e.g. for a debug entry with a format
310string plus two varargs one would need to allocate a (3 * sizeof(long))
311byte data area in the debug_register() function.
312
313IMPORTANT:
314 Using "%s" in sprintf event functions is dangerous. You can only
315 use "%s" in the sprintf event functions, if the memory for the passed string
316 is available as long as the debug feature exists. The reason behind this is
317 that due to performance considerations only a pointer to the string is stored
318 in the debug feature. If you log a string that is freed afterwards, you will
319 get an OOPS when inspecting the debug feature, because then the debug feature
320 will access the already freed memory.
321
322NOTE:
323 If using the sprintf view do NOT use other event/exception functions
324 than the sprintf-event and -exception functions.
325
326The format of the hex_ascii and sprintf view is as follows:
327
328- Number of area
329- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
330 Universal Time (UTC), January 1, 1970)
331- level of debug entry
332- Exception flag (* = Exception)
333- Cpu-Number of calling task
334- Return Address to caller
335- data field
336
337The format of the raw view is:
338
339- Header as described in debug.h
340- datafield
341
342A typical line of the hex_ascii view will look like the following (first line
343is only for explanation and will not be displayed when 'cating' the view)::
344
345 area time level exception cpu caller data (hex + ascii)
346 --------------------------------------------------------------------------
347 00 00964419409:440690 1 - 00 88023fe
348
349
350Defining views
351--------------
352
353Views are specified with the 'debug_view' structure. There are defined
354callback functions which are used for reading and writing the debugfs files:
355
356.. code-block:: c
357
358 struct debug_view {
359 char name[DEBUG_MAX_PROCF_LEN];
360 debug_prolog_proc_t* prolog_proc;
361 debug_header_proc_t* header_proc;
362 debug_format_proc_t* format_proc;
363 debug_input_proc_t* input_proc;
364 void* private_data;
365 };
366
367where:
368
369.. code-block:: c
370
371 typedef int (debug_header_proc_t) (debug_info_t* id,
372 struct debug_view* view,
373 int area,
374 debug_entry_t* entry,
375 char* out_buf);
376
377 typedef int (debug_format_proc_t) (debug_info_t* id,
378 struct debug_view* view, char* out_buf,
379 const char* in_buf);
380 typedef int (debug_prolog_proc_t) (debug_info_t* id,
381 struct debug_view* view,
382 char* out_buf);
383 typedef int (debug_input_proc_t) (debug_info_t* id,
384 struct debug_view* view,
385 struct file* file, const char* user_buf,
386 size_t in_buf_size, loff_t* offset);
387
388
389The "private_data" member can be used as pointer to view specific data.
390It is not used by the debug feature itself.
391
392The output when reading a debugfs file is structured like this::
393
394 "prolog_proc output"
395
396 "header_proc output 1" "format_proc output 1"
397 "header_proc output 2" "format_proc output 2"
398 "header_proc output 3" "format_proc output 3"
399 ...
400
401When a view is read from the debugfs, the Debug Feature calls the
402'prolog_proc' once for writing the prolog.
403Then 'header_proc' and 'format_proc' are called for each
404existing debug entry.
405
406The input_proc can be used to implement functionality when it is written to
407the view (e.g. like with ``echo "0" > /sys/kernel/debug/s390dbf/dasd/level``).
408
409For header_proc there can be used the default function
410:c:func:`debug_dflt_header_fn()` which is defined in debug.h.
411and which produces the same header output as the predefined views.
412E.g::
413
414 00 00964419409:440761 2 - 00 88023ec
415
416In order to see how to use the callback functions check the implementation
417of the default views!
418
419Example:
420
421.. code-block:: c
422
423 #include <asm/debug.h>
424
425 #define UNKNOWNSTR "data: %08x"
426
427 const char* messages[] =
428 {"This error...........\n",
429 "That error...........\n",
430 "Problem..............\n",
431 "Something went wrong.\n",
432 "Everything ok........\n",
433 NULL
434 };
435
436 static int debug_test_format_fn(
437 debug_info_t *id, struct debug_view *view,
438 char *out_buf, const char *in_buf
439 )
440 {
441 int i, rc = 0;
442
443 if (id->buf_size >= 4) {
444 int msg_nr = *((int*)in_buf);
445 if (msg_nr < sizeof(messages) / sizeof(char*) - 1)
446 rc += sprintf(out_buf, "%s", messages[msg_nr]);
447 else
448 rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
449 }
450 return rc;
451 }
452
453 struct debug_view debug_test_view = {
454 "myview", /* name of view */
455 NULL, /* no prolog */
456 &debug_dflt_header_fn, /* default header for each entry */
457 &debug_test_format_fn, /* our own format function */
458 NULL, /* no input function */
459 NULL /* no private data */
460 };
461
462test:
463=====
464
465.. code-block:: c
466
467 debug_info_t *debug_info;
468 int i;
469 ...
470 debug_info = debug_register("test", 0, 4, 4);
471 debug_register_view(debug_info, &debug_test_view);
472 for (i = 0; i < 10; i ++)
473 debug_int_event(debug_info, 1, i);
474
475::
476
477 > cat /sys/kernel/debug/s390dbf/test/myview
478 00 00964419734:611402 1 - 00 88042ca This error...........
479 00 00964419734:611405 1 - 00 88042ca That error...........
480 00 00964419734:611408 1 - 00 88042ca Problem..............
481 00 00964419734:611411 1 - 00 88042ca Something went wrong.
482 00 00964419734:611414 1 - 00 88042ca Everything ok........
483 00 00964419734:611417 1 - 00 88042ca data: 00000005
484 00 00964419734:611419 1 - 00 88042ca data: 00000006
485 00 00964419734:611422 1 - 00 88042ca data: 00000007
486 00 00964419734:611425 1 - 00 88042ca data: 00000008
487 00 00964419734:611428 1 - 00 88042ca data: 00000009
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
deleted file mode 100644
index 61329fd62e89..000000000000
--- a/Documentation/s390/s390dbf.txt
+++ /dev/null
@@ -1,667 +0,0 @@
1S390 Debug Feature
2==================
3
4files: arch/s390/kernel/debug.c
5 arch/s390/include/asm/debug.h
6
7Description:
8------------
9The goal of this feature is to provide a kernel debug logging API
10where log records can be stored efficiently in memory, where each component
11(e.g. device drivers) can have one separate debug log.
12One purpose of this is to inspect the debug logs after a production system crash
13in order to analyze the reason for the crash.
14If the system still runs but only a subcomponent which uses dbf fails,
15it is possible to look at the debug logs on a live system via the Linux
16debugfs filesystem.
17The debug feature may also very useful for kernel and driver development.
18
19Design:
20-------
21Kernel components (e.g. device drivers) can register themselves at the debug
22feature with the function call debug_register(). This function initializes a
23debug log for the caller. For each debug log exists a number of debug areas
24where exactly one is active at one time. Each debug area consists of contiguous
25pages in memory. In the debug areas there are stored debug entries (log records)
26which are written by event- and exception-calls.
27
28An event-call writes the specified debug entry to the active debug
29area and updates the log pointer for the active area. If the end
30of the active debug area is reached, a wrap around is done (ring buffer)
31and the next debug entry will be written at the beginning of the active
32debug area.
33
34An exception-call writes the specified debug entry to the log and
35switches to the next debug area. This is done in order to be sure
36that the records which describe the origin of the exception are not
37overwritten when a wrap around for the current area occurs.
38
39The debug areas themselves are also ordered in form of a ring buffer.
40When an exception is thrown in the last debug area, the following debug
41entries are then written again in the very first area.
42
43There are three versions for the event- and exception-calls: One for
44logging raw data, one for text and one for numbers.
45
46Each debug entry contains the following data:
47
48- Timestamp
49- Cpu-Number of calling task
50- Level of debug entry (0...6)
51- Return Address to caller
52- Flag, if entry is an exception or not
53
54The debug logs can be inspected in a live system through entries in
55the debugfs-filesystem. Under the toplevel directory "s390dbf" there is
56a directory for each registered component, which is named like the
57corresponding component. The debugfs normally should be mounted to
58/sys/kernel/debug therefore the debug feature can be accessed under
59/sys/kernel/debug/s390dbf.
60
61The content of the directories are files which represent different views
62to the debug log. Each component can decide which views should be
63used through registering them with the function debug_register_view().
64Predefined views for hex/ascii, sprintf and raw binary data are provided.
65It is also possible to define other views. The content of
66a view can be inspected simply by reading the corresponding debugfs file.
67
68All debug logs have an actual debug level (range from 0 to 6).
69The default level is 3. Event and Exception functions have a 'level'
70parameter. Only debug entries with a level that is lower or equal
71than the actual level are written to the log. This means, when
72writing events, high priority log entries should have a low level
73value whereas low priority entries should have a high one.
74The actual debug level can be changed with the help of the debugfs-filesystem
75through writing a number string "x" to the 'level' debugfs file which is
76provided for every debug log. Debugging can be switched off completely
77by using "-" on the 'level' debugfs file.
78
79Example:
80
81> echo "-" > /sys/kernel/debug/s390dbf/dasd/level
82
83It is also possible to deactivate the debug feature globally for every
84debug log. You can change the behavior using 2 sysctl parameters in
85/proc/sys/s390dbf:
86There are currently 2 possible triggers, which stop the debug feature
87globally. The first possibility is to use the "debug_active" sysctl. If
88set to 1 the debug feature is running. If "debug_active" is set to 0 the
89debug feature is turned off.
90The second trigger which stops the debug feature is a kernel oops.
91That prevents the debug feature from overwriting debug information that
92happened before the oops. After an oops you can reactivate the debug feature
93by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not
94suggested to use an oopsed kernel in a production environment.
95If you want to disallow the deactivation of the debug feature, you can use
96the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
97feature cannot be stopped. If the debug feature is already stopped, it
98will stay deactivated.
99
100Kernel Interfaces:
101------------------
102
103----------------------------------------------------------------------------
104debug_info_t *debug_register(char *name, int pages, int nr_areas,
105 int buf_size);
106
107Parameter: name: Name of debug log (e.g. used for debugfs entry)
108 pages: number of pages, which will be allocated per area
109 nr_areas: number of debug areas
110 buf_size: size of data area in each debug entry
111
112Return Value: Handle for generated debug area
113 NULL if register failed
114
115Description: Allocates memory for a debug log
116 Must not be called within an interrupt handler
117
118----------------------------------------------------------------------------
119debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
120 int buf_size, mode_t mode, uid_t uid,
121 gid_t gid);
122
123Parameter: name: Name of debug log (e.g. used for debugfs entry)
124 pages: Number of pages, which will be allocated per area
125 nr_areas: Number of debug areas
126 buf_size: Size of data area in each debug entry
127 mode: File mode for debugfs files. E.g. S_IRWXUGO
128 uid: User ID for debugfs files. Currently only 0 is
129 supported.
130 gid: Group ID for debugfs files. Currently only 0 is
131 supported.
132
133Return Value: Handle for generated debug area
134 NULL if register failed
135
136Description: Allocates memory for a debug log
137 Must not be called within an interrupt handler
138
139---------------------------------------------------------------------------
140void debug_unregister (debug_info_t * id);
141
142Parameter: id: handle for debug log
143
144Return Value: none
145
146Description: frees memory for a debug log and removes all registered debug
147 views.
148 Must not be called within an interrupt handler
149
150---------------------------------------------------------------------------
151void debug_set_level (debug_info_t * id, int new_level);
152
153Parameter: id: handle for debug log
154 new_level: new debug level
155
156Return Value: none
157
158Description: Sets new actual debug level if new_level is valid.
159
160---------------------------------------------------------------------------
161bool debug_level_enabled (debug_info_t * id, int level);
162
163Parameter: id: handle for debug log
164 level: debug level
165
166Return Value: True if level is less or equal to the current debug level.
167
168Description: Returns true if debug events for the specified level would be
169 logged. Otherwise returns false.
170---------------------------------------------------------------------------
171void debug_stop_all(void);
172
173Parameter: none
174
175Return Value: none
176
177Description: stops the debug feature if stopping is allowed. Currently
178 used in case of a kernel oops.
179
180---------------------------------------------------------------------------
181debug_entry_t* debug_event (debug_info_t* id, int level, void* data,
182 int length);
183
184Parameter: id: handle for debug log
185 level: debug level
186 data: pointer to data for debug entry
187 length: length of data in bytes
188
189Return Value: Address of written debug entry
190
191Description: writes debug entry to active debug area (if level <= actual
192 debug level)
193
194---------------------------------------------------------------------------
195debug_entry_t* debug_int_event (debug_info_t * id, int level,
196 unsigned int data);
197debug_entry_t* debug_long_event(debug_info_t * id, int level,
198 unsigned long data);
199
200Parameter: id: handle for debug log
201 level: debug level
202 data: integer value for debug entry
203
204Return Value: Address of written debug entry
205
206Description: writes debug entry to active debug area (if level <= actual
207 debug level)
208
209---------------------------------------------------------------------------
210debug_entry_t* debug_text_event (debug_info_t * id, int level,
211 const char* data);
212
213Parameter: id: handle for debug log
214 level: debug level
215 data: string for debug entry
216
217Return Value: Address of written debug entry
218
219Description: writes debug entry in ascii format to active debug area
220 (if level <= actual debug level)
221
222---------------------------------------------------------------------------
223debug_entry_t* debug_sprintf_event (debug_info_t * id, int level,
224 char* string,...);
225
226Parameter: id: handle for debug log
227 level: debug level
228 string: format string for debug entry
229 ...: varargs used as in sprintf()
230
231Return Value: Address of written debug entry
232
233Description: writes debug entry with format string and varargs (longs) to
234 active debug area (if level $<=$ actual debug level).
235 floats and long long datatypes cannot be used as varargs.
236
237---------------------------------------------------------------------------
238
239debug_entry_t* debug_exception (debug_info_t* id, int level, void* data,
240 int length);
241
242Parameter: id: handle for debug log
243 level: debug level
244 data: pointer to data for debug entry
245 length: length of data in bytes
246
247Return Value: Address of written debug entry
248
249Description: writes debug entry to active debug area (if level <= actual
250 debug level) and switches to next debug area
251
252---------------------------------------------------------------------------
253debug_entry_t* debug_int_exception (debug_info_t * id, int level,
254 unsigned int data);
255debug_entry_t* debug_long_exception(debug_info_t * id, int level,
256 unsigned long data);
257
258Parameter: id: handle for debug log
259 level: debug level
260 data: integer value for debug entry
261
262Return Value: Address of written debug entry
263
264Description: writes debug entry to active debug area (if level <= actual
265 debug level) and switches to next debug area
266
267---------------------------------------------------------------------------
268debug_entry_t* debug_text_exception (debug_info_t * id, int level,
269 const char* data);
270
271Parameter: id: handle for debug log
272 level: debug level
273 data: string for debug entry
274
275Return Value: Address of written debug entry
276
277Description: writes debug entry in ascii format to active debug area
278 (if level <= actual debug level) and switches to next debug
279 area
280
281---------------------------------------------------------------------------
282debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level,
283 char* string,...);
284
285Parameter: id: handle for debug log
286 level: debug level
287 string: format string for debug entry
288 ...: varargs used as in sprintf()
289
290Return Value: Address of written debug entry
291
292Description: writes debug entry with format string and varargs (longs) to
293 active debug area (if level $<=$ actual debug level) and
294 switches to next debug area.
295 floats and long long datatypes cannot be used as varargs.
296
297---------------------------------------------------------------------------
298
299int debug_register_view (debug_info_t * id, struct debug_view *view);
300
301Parameter: id: handle for debug log
302 view: pointer to debug view struct
303
304Return Value: 0 : ok
305 < 0: Error
306
307Description: registers new debug view and creates debugfs dir entry
308
309---------------------------------------------------------------------------
310int debug_unregister_view (debug_info_t * id, struct debug_view *view);
311
312Parameter: id: handle for debug log
313 view: pointer to debug view struct
314
315Return Value: 0 : ok
316 < 0: Error
317
318Description: unregisters debug view and removes debugfs dir entry
319
320
321
322Predefined views:
323-----------------
324
325extern struct debug_view debug_hex_ascii_view;
326extern struct debug_view debug_raw_view;
327extern struct debug_view debug_sprintf_view;
328
329Examples
330--------
331
332/*
333 * hex_ascii- + raw-view Example
334 */
335
336#include <linux/init.h>
337#include <asm/debug.h>
338
339static debug_info_t* debug_info;
340
341static int init(void)
342{
343 /* register 4 debug areas with one page each and 4 byte data field */
344
345 debug_info = debug_register ("test", 1, 4, 4 );
346 debug_register_view(debug_info,&debug_hex_ascii_view);
347 debug_register_view(debug_info,&debug_raw_view);
348
349 debug_text_event(debug_info, 4 , "one ");
350 debug_int_exception(debug_info, 4, 4711);
351 debug_event(debug_info, 3, &debug_info, 4);
352
353 return 0;
354}
355
356static void cleanup(void)
357{
358 debug_unregister (debug_info);
359}
360
361module_init(init);
362module_exit(cleanup);
363
364---------------------------------------------------------------------------
365
366/*
367 * sprintf-view Example
368 */
369
370#include <linux/init.h>
371#include <asm/debug.h>
372
373static debug_info_t* debug_info;
374
375static int init(void)
376{
377 /* register 4 debug areas with one page each and data field for */
378 /* format string pointer + 2 varargs (= 3 * sizeof(long)) */
379
380 debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
381 debug_register_view(debug_info,&debug_sprintf_view);
382
383 debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
384 debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
385
386 return 0;
387}
388
389static void cleanup(void)
390{
391 debug_unregister (debug_info);
392}
393
394module_init(init);
395module_exit(cleanup);
396
397
398
399Debugfs Interface
400----------------
401Views to the debug logs can be investigated through reading the corresponding
402debugfs-files:
403
404Example:
405
406> ls /sys/kernel/debug/s390dbf/dasd
407flush hex_ascii level pages raw
408> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
40900 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
41000 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
41100 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
41200 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
41301 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
41401 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
41501 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
41601 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
41701 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
41801 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
419
420See section about predefined views for explanation of the above output!
421
422Changing the debug level
423------------------------
424
425Example:
426
427
428> cat /sys/kernel/debug/s390dbf/dasd/level
4293
430> echo "5" > /sys/kernel/debug/s390dbf/dasd/level
431> cat /sys/kernel/debug/s390dbf/dasd/level
4325
433
434Flushing debug areas
435--------------------
436Debug areas can be flushed with piping the number of the desired
437area (0...n) to the debugfs file "flush". When using "-" all debug areas
438are flushed.
439
440Examples:
441
4421. Flush debug area 0:
443> echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
444
4452. Flush all debug areas:
446> echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
447
448Changing the size of debug areas
449------------------------------------
450It is possible the change the size of debug areas through piping
451the number of pages to the debugfs file "pages". The resize request will
452also flush the debug areas.
453
454Example:
455
456Define 4 pages for the debug areas of debug feature "dasd":
457> echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
458
459Stooping the debug feature
460--------------------------
461Example:
462
4631. Check if stopping is allowed
464> cat /proc/sys/s390dbf/debug_stoppable
4652. Stop debug feature
466> echo 0 > /proc/sys/s390dbf/debug_active
467
468lcrash Interface
469----------------
470It is planned that the dump analysis tool lcrash gets an additional command
471's390dbf' to display all the debug logs. With this tool it will be possible
472to investigate the debug logs on a live system and with a memory dump after
473a system crash.
474
475Investigating raw memory
476------------------------
477One last possibility to investigate the debug logs at a live
478system and after a system crash is to look at the raw memory
479under VM or at the Service Element.
480It is possible to find the anker of the debug-logs through
481the 'debug_area_first' symbol in the System map. Then one has
482to follow the correct pointers of the data-structures defined
483in debug.h and find the debug-areas in memory.
484Normally modules which use the debug feature will also have
485a global variable with the pointer to the debug-logs. Following
486this pointer it will also be possible to find the debug logs in
487memory.
488
489For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
490for the length of the data field in debug_register() in
491order to see the debug entries well formatted.
492
493
494Predefined Views
495----------------
496
497There are three predefined views: hex_ascii, raw and sprintf.
498The hex_ascii view shows the data field in hex and ascii representation
499(e.g. '45 43 4b 44 | ECKD').
500The raw view returns a bytestream as the debug areas are stored in memory.
501
502The sprintf view formats the debug entries in the same way as the sprintf
503function would do. The sprintf event/exception functions write to the
504debug entry a pointer to the format string (size = sizeof(long))
505and for each vararg a long value. So e.g. for a debug entry with a format
506string plus two varargs one would need to allocate a (3 * sizeof(long))
507byte data area in the debug_register() function.
508
509IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
510use "%s" in the sprintf event functions, if the memory for the passed string is
511available as long as the debug feature exists. The reason behind this is that
512due to performance considerations only a pointer to the string is stored in
513the debug feature. If you log a string that is freed afterwards, you will get
514an OOPS when inspecting the debug feature, because then the debug feature will
515access the already freed memory.
516
517NOTE: If using the sprintf view do NOT use other event/exception functions
518than the sprintf-event and -exception functions.
519
520The format of the hex_ascii and sprintf view is as follows:
521- Number of area
522- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
523 Universal Time (UTC), January 1, 1970)
524- level of debug entry
525- Exception flag (* = Exception)
526- Cpu-Number of calling task
527- Return Address to caller
528- data field
529
530The format of the raw view is:
531- Header as described in debug.h
532- datafield
533
534A typical line of the hex_ascii view will look like the following (first line
535is only for explanation and will not be displayed when 'cating' the view):
536
537area time level exception cpu caller data (hex + ascii)
538--------------------------------------------------------------------------
53900 00964419409:440690 1 - 00 88023fe
540
541
542Defining views
543--------------
544
545Views are specified with the 'debug_view' structure. There are defined
546callback functions which are used for reading and writing the debugfs files:
547
548struct debug_view {
549 char name[DEBUG_MAX_PROCF_LEN];
550 debug_prolog_proc_t* prolog_proc;
551 debug_header_proc_t* header_proc;
552 debug_format_proc_t* format_proc;
553 debug_input_proc_t* input_proc;
554 void* private_data;
555};
556
557where
558
559typedef int (debug_header_proc_t) (debug_info_t* id,
560 struct debug_view* view,
561 int area,
562 debug_entry_t* entry,
563 char* out_buf);
564
565typedef int (debug_format_proc_t) (debug_info_t* id,
566 struct debug_view* view, char* out_buf,
567 const char* in_buf);
568typedef int (debug_prolog_proc_t) (debug_info_t* id,
569 struct debug_view* view,
570 char* out_buf);
571typedef int (debug_input_proc_t) (debug_info_t* id,
572 struct debug_view* view,
573 struct file* file, const char* user_buf,
574 size_t in_buf_size, loff_t* offset);
575
576
577The "private_data" member can be used as pointer to view specific data.
578It is not used by the debug feature itself.
579
580The output when reading a debugfs file is structured like this:
581
582"prolog_proc output"
583
584"header_proc output 1" "format_proc output 1"
585"header_proc output 2" "format_proc output 2"
586"header_proc output 3" "format_proc output 3"
587...
588
589When a view is read from the debugfs, the Debug Feature calls the
590'prolog_proc' once for writing the prolog.
591Then 'header_proc' and 'format_proc' are called for each
592existing debug entry.
593
594The input_proc can be used to implement functionality when it is written to
595the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
596
597For header_proc there can be used the default function
598debug_dflt_header_fn() which is defined in debug.h.
599and which produces the same header output as the predefined views.
600E.g:
60100 00964419409:440761 2 - 00 88023ec
602
603In order to see how to use the callback functions check the implementation
604of the default views!
605
606Example
607
608#include <asm/debug.h>
609
610#define UNKNOWNSTR "data: %08x"
611
612const char* messages[] =
613{"This error...........\n",
614 "That error...........\n",
615 "Problem..............\n",
616 "Something went wrong.\n",
617 "Everything ok........\n",
618 NULL
619};
620
621static int debug_test_format_fn(
622 debug_info_t * id, struct debug_view *view,
623 char *out_buf, const char *in_buf
624)
625{
626 int i, rc = 0;
627
628 if(id->buf_size >= 4) {
629 int msg_nr = *((int*)in_buf);
630 if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
631 rc += sprintf(out_buf, "%s", messages[msg_nr]);
632 else
633 rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
634 }
635 out:
636 return rc;
637}
638
639struct debug_view debug_test_view = {
640 "myview", /* name of view */
641 NULL, /* no prolog */
642 &debug_dflt_header_fn, /* default header for each entry */
643 &debug_test_format_fn, /* our own format function */
644 NULL, /* no input function */
645 NULL /* no private data */
646};
647
648=====
649test:
650=====
651debug_info_t *debug_info;
652...
653debug_info = debug_register ("test", 0, 4, 4 ));
654debug_register_view(debug_info, &debug_test_view);
655for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i);
656
657> cat /sys/kernel/debug/s390dbf/test/myview
65800 00964419734:611402 1 - 00 88042ca This error...........
65900 00964419734:611405 1 - 00 88042ca That error...........
66000 00964419734:611408 1 - 00 88042ca Problem..............
66100 00964419734:611411 1 - 00 88042ca Something went wrong.
66200 00964419734:611414 1 - 00 88042ca Everything ok........
66300 00964419734:611417 1 - 00 88042ca data: 00000005
66400 00964419734:611419 1 - 00 88042ca data: 00000006
66500 00964419734:611422 1 - 00 88042ca data: 00000007
66600 00964419734:611425 1 - 00 88042ca data: 00000008
66700 00964419734:611428 1 - 00 88042ca data: 00000009
diff --git a/Documentation/s390/text_files.rst b/Documentation/s390/text_files.rst
new file mode 100644
index 000000000000..c94d05d4fa17
--- /dev/null
+++ b/Documentation/s390/text_files.rst
@@ -0,0 +1,11 @@
1ibm 3270 changelog
2------------------
3
4.. include:: 3270.ChangeLog
5 :literal:
6
7ibm 3270 config3270.sh
8----------------------
9
10.. literalinclude:: config3270.sh
11 :language: shell
diff --git a/Documentation/s390/vfio-ap.txt b/Documentation/s390/vfio-ap.rst
index 65167cfe4485..b5c51f7c748d 100644
--- a/Documentation/s390/vfio-ap.txt
+++ b/Documentation/s390/vfio-ap.rst
@@ -1,4 +1,9 @@
1Introduction: 1===============================
2Adjunct Processor (AP) facility
3===============================
4
5
6Introduction
2============ 7============
3The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised 8The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
4of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards. 9of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
@@ -11,7 +16,7 @@ framework. This implementation relies considerably on the s390 virtualization
11facilities which do most of the hard work of providing direct access to AP 16facilities which do most of the hard work of providing direct access to AP
12devices. 17devices.
13 18
14AP Architectural Overview: 19AP Architectural Overview
15========================= 20=========================
16To facilitate the comprehension of the design, let's start with some 21To facilitate the comprehension of the design, let's start with some
17definitions: 22definitions:
@@ -31,13 +36,13 @@ definitions:
31 in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and 36 in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
32 creates a sysfs device for each assigned adapter. For example, if AP adapters 37 creates a sysfs device for each assigned adapter. For example, if AP adapters
33 4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following 38 4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
34 sysfs device entries: 39 sysfs device entries::
35 40
36 /sys/devices/ap/card04 41 /sys/devices/ap/card04
37 /sys/devices/ap/card0a 42 /sys/devices/ap/card0a
38 43
39 Symbolic links to these devices will also be created in the AP bus devices 44 Symbolic links to these devices will also be created in the AP bus devices
40 sub-directory: 45 sub-directory::
41 46
42 /sys/bus/ap/devices/[card04] 47 /sys/bus/ap/devices/[card04]
43 /sys/bus/ap/devices/[card04] 48 /sys/bus/ap/devices/[card04]
@@ -84,7 +89,7 @@ definitions:
84 the cross product of the AP adapter and usage domain numbers detected when the 89 the cross product of the AP adapter and usage domain numbers detected when the
85 AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage 90 AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
86 domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the 91 domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
87 following sysfs entries: 92 following sysfs entries::
88 93
89 /sys/devices/ap/card04/04.0006 94 /sys/devices/ap/card04/04.0006
90 /sys/devices/ap/card04/04.0047 95 /sys/devices/ap/card04/04.0047
@@ -92,7 +97,7 @@ definitions:
92 /sys/devices/ap/card0a/0a.0047 97 /sys/devices/ap/card0a/0a.0047
93 98
94 The following symbolic links to these devices will be created in the AP bus 99 The following symbolic links to these devices will be created in the AP bus
95 devices subdirectory: 100 devices subdirectory::
96 101
97 /sys/bus/ap/devices/[04.0006] 102 /sys/bus/ap/devices/[04.0006]
98 /sys/bus/ap/devices/[04.0047] 103 /sys/bus/ap/devices/[04.0047]
@@ -112,7 +117,7 @@ definitions:
112 domain that is not one of the usage domains, but the modified domain 117 domain that is not one of the usage domains, but the modified domain
113 must be one of the control domains. 118 must be one of the control domains.
114 119
115AP and SIE: 120AP and SIE
116========== 121==========
117Let's now take a look at how AP instructions executed on a guest are interpreted 122Let's now take a look at how AP instructions executed on a guest are interpreted
118by the hardware. 123by the hardware.
@@ -153,7 +158,7 @@ and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
153 158
154The APQNs can provide secure key functionality - i.e., a private key is stored 159The APQNs can provide secure key functionality - i.e., a private key is stored
155on the adapter card for each of its domains - so each APQN must be assigned to 160on the adapter card for each of its domains - so each APQN must be assigned to
156at most one guest or to the linux host. 161at most one guest or to the linux host::
157 162
158 Example 1: Valid configuration: 163 Example 1: Valid configuration:
159 ------------------------------ 164 ------------------------------
@@ -181,8 +186,8 @@ at most one guest or to the linux host.
181 This is an invalid configuration because both guests have access to 186 This is an invalid configuration because both guests have access to
182 APQN (1,6). 187 APQN (1,6).
183 188
184The Design: 189The Design
185=========== 190==========
186The design introduces three new objects: 191The design introduces three new objects:
187 192
1881. AP matrix device 1931. AP matrix device
@@ -205,43 +210,43 @@ The VFIO AP (vfio_ap) device driver serves the following purposes:
205Reserve APQNs for exclusive use of KVM guests 210Reserve APQNs for exclusive use of KVM guests
206--------------------------------------------- 211---------------------------------------------
207The following block diagram illustrates the mechanism by which APQNs are 212The following block diagram illustrates the mechanism by which APQNs are
208reserved: 213reserved::
209 214
210 +------------------+ 215 +------------------+
211 7 remove | | 216 7 remove | |
212 +--------------------> cex4queue driver | 217 +--------------------> cex4queue driver |
213 | | | 218 | | |
214 | +------------------+ 219 | +------------------+
215 | 220 |
216 | 221 |
217 | +------------------+ +-----------------+ 222 | +------------------+ +----------------+
218 | 5 register driver | | 3 create | | 223 | 5 register driver | | 3 create | |
219 | +----------------> Device core +----------> matrix device | 224 | +----------------> Device core +----------> matrix device |
220 | | | | | | 225 | | | | | |
221 | | +--------^---------+ +-----------------+ 226 | | +--------^---------+ +----------------+
222 | | | 227 | | |
223 | | +-------------------+ 228 | | +-------------------+
224 | | +-----------------------------------+ | 229 | | +-----------------------------------+ |
225 | | | 4 register AP driver | | 2 register device 230 | | | 4 register AP driver | | 2 register device
226 | | | | | 231 | | | | |
227+--------+---+-v---+ +--------+-------+-+ 232 +--------+---+-v---+ +--------+-------+-+
228| | | | 233 | | | |
229| ap_bus +--------------------- > vfio_ap driver | 234 | ap_bus +--------------------- > vfio_ap driver |
230| | 8 probe | | 235 | | 8 probe | |
231+--------^---------+ +--^--^------------+ 236 +--------^---------+ +--^--^------------+
2326 edit | | | 237 6 edit | | |
233 apmask | +-----------------------------+ | 9 mdev create 238 apmask | +-----------------------------+ | 9 mdev create
234 aqmask | | 1 modprobe | 239 aqmask | | 1 modprobe |
235+--------+-----+---+ +----------------+-+ +------------------+ 240 +--------+-----+---+ +----------------+-+ +----------------+
236| | | |8 create | mediated | 241 | | | |8 create | mediated |
237| admin | | VFIO device core |---------> matrix | 242 | admin | | VFIO device core |---------> matrix |
238| + | | | device | 243 | + | | | device |
239+------+-+---------+ +--------^---------+ +--------^---------+ 244 +------+-+---------+ +--------^---------+ +--------^-------+
240 | | | | 245 | | | |
241 | | 9 create vfio_ap-passthrough | | 246 | | 9 create vfio_ap-passthrough | |
242 | +------------------------------+ | 247 | +------------------------------+ |
243 +-------------------------------------------------------------+ 248 +-------------------------------------------------------------+
244 10 assign adapter/domain/control domain 249 10 assign adapter/domain/control domain
245 250
246The process for reserving an AP queue for use by a KVM guest is: 251The process for reserving an AP queue for use by a KVM guest is:
247 252
@@ -250,7 +255,7 @@ The process for reserving an AP queue for use by a KVM guest is:
250 device with the device core. This will serve as the parent device for 255 device with the device core. This will serve as the parent device for
251 all mediated matrix devices used to configure an AP matrix for a guest. 256 all mediated matrix devices used to configure an AP matrix for a guest.
2523. The /sys/devices/vfio_ap/matrix device is created by the device core 2573. The /sys/devices/vfio_ap/matrix device is created by the device core
2534 The vfio_ap device driver will register with the AP bus for AP queue devices 2584. The vfio_ap device driver will register with the AP bus for AP queue devices
254 of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap 259 of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
255 driver's probe and remove callback interfaces. Devices older than CEX4 queues 260 driver's probe and remove callback interfaces. Devices older than CEX4 queues
256 are not supported to simplify the implementation by not needlessly 261 are not supported to simplify the implementation by not needlessly
@@ -266,13 +271,14 @@ The process for reserving an AP queue for use by a KVM guest is:
266 it. 271 it.
2679. The administrator creates a passthrough type mediated matrix device to be 2729. The administrator creates a passthrough type mediated matrix device to be
268 used by a guest 273 used by a guest
26910 The administrator assigns the adapters, usage domains and control domains 27410. The administrator assigns the adapters, usage domains and control domains
270 to be exclusively used by a guest. 275 to be exclusively used by a guest.
271 276
272Set up the VFIO mediated device interfaces 277Set up the VFIO mediated device interfaces
273------------------------------------------ 278------------------------------------------
274The VFIO AP device driver utilizes the common interface of the VFIO mediated 279The VFIO AP device driver utilizes the common interface of the VFIO mediated
275device core driver to: 280device core driver to:
281
276* Register an AP mediated bus driver to add a mediated matrix device to and 282* Register an AP mediated bus driver to add a mediated matrix device to and
277 remove it from a VFIO group. 283 remove it from a VFIO group.
278* Create and destroy a mediated matrix device 284* Create and destroy a mediated matrix device
@@ -280,25 +286,25 @@ device core driver to:
280* Add a mediated matrix device to and remove it from an IOMMU group 286* Add a mediated matrix device to and remove it from an IOMMU group
281 287
282The following high-level block diagram shows the main components and interfaces 288The following high-level block diagram shows the main components and interfaces
283of the VFIO AP mediated matrix device driver: 289of the VFIO AP mediated matrix device driver::
284 290
285 +-------------+ 291 +-------------+
286 | | 292 | |
287 | +---------+ | mdev_register_driver() +--------------+ 293 | +---------+ | mdev_register_driver() +--------------+
288 | | Mdev | +<-----------------------+ | 294 | | Mdev | +<-----------------------+ |
289 | | bus | | | vfio_mdev.ko | 295 | | bus | | | vfio_mdev.ko |
290 | | driver | +----------------------->+ |<-> VFIO user 296 | | driver | +----------------------->+ |<-> VFIO user
291 | +---------+ | probe()/remove() +--------------+ APIs 297 | +---------+ | probe()/remove() +--------------+ APIs
292 | | 298 | |
293 | MDEV CORE | 299 | MDEV CORE |
294 | MODULE | 300 | MODULE |
295 | mdev.ko | 301 | mdev.ko |
296 | +---------+ | mdev_register_device() +--------------+ 302 | +---------+ | mdev_register_device() +--------------+
297 | |Physical | +<-----------------------+ | 303 | |Physical | +<-----------------------+ |
298 | | device | | | vfio_ap.ko |<-> matrix 304 | | device | | | vfio_ap.ko |<-> matrix
299 | |interface| +----------------------->+ | device 305 | |interface| +----------------------->+ | device
300 | +---------+ | callback +--------------+ 306 | +---------+ | callback +--------------+
301 +-------------+ 307 +-------------+
302 308
303During initialization of the vfio_ap module, the matrix device is registered 309During initialization of the vfio_ap module, the matrix device is registered
304with an 'mdev_parent_ops' structure that provides the sysfs attribute 310with an 'mdev_parent_ops' structure that provides the sysfs attribute
@@ -306,7 +312,8 @@ structures, mdev functions and callback interfaces for managing the mediated
306matrix device. 312matrix device.
307 313
308* sysfs attribute structures: 314* sysfs attribute structures:
309 * supported_type_groups 315
316 supported_type_groups
310 The VFIO mediated device framework supports creation of user-defined 317 The VFIO mediated device framework supports creation of user-defined
311 mediated device types. These mediated device types are specified 318 mediated device types. These mediated device types are specified
312 via the 'supported_type_groups' structure when a device is registered 319 via the 'supported_type_groups' structure when a device is registered
@@ -318,61 +325,72 @@ matrix device.
318 325
319 The VFIO AP device driver will register one mediated device type for 326 The VFIO AP device driver will register one mediated device type for
320 passthrough devices: 327 passthrough devices:
328
321 /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough 329 /sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
330
322 Only the read-only attributes required by the VFIO mdev framework will 331 Only the read-only attributes required by the VFIO mdev framework will
323 be provided: 332 be provided::
324 ... name 333
325 ... device_api 334 ... name
326 ... available_instances 335 ... device_api
327 ... device_api 336 ... available_instances
328 Where: 337 ... device_api
329 * name: specifies the name of the mediated device type 338
330 * device_api: the mediated device type's API 339 Where:
331 * available_instances: the number of mediated matrix passthrough devices 340
332 that can be created 341 * name:
333 * device_api: specifies the VFIO API 342 specifies the name of the mediated device type
334 * mdev_attr_groups 343 * device_api:
344 the mediated device type's API
345 * available_instances:
346 the number of mediated matrix passthrough devices
347 that can be created
348 * device_api:
349 specifies the VFIO API
350 mdev_attr_groups
335 This attribute group identifies the user-defined sysfs attributes of the 351 This attribute group identifies the user-defined sysfs attributes of the
336 mediated device. When a device is registered with the VFIO mediated device 352 mediated device. When a device is registered with the VFIO mediated device
337 framework, the sysfs attribute files identified in the 'mdev_attr_groups' 353 framework, the sysfs attribute files identified in the 'mdev_attr_groups'
338 structure will be created in the mediated matrix device's directory. The 354 structure will be created in the mediated matrix device's directory. The
339 sysfs attributes for a mediated matrix device are: 355 sysfs attributes for a mediated matrix device are:
340 * assign_adapter: 356
341 * unassign_adapter: 357 assign_adapter / unassign_adapter:
342 Write-only attributes for assigning/unassigning an AP adapter to/from the 358 Write-only attributes for assigning/unassigning an AP adapter to/from the
343 mediated matrix device. To assign/unassign an adapter, the APID of the 359 mediated matrix device. To assign/unassign an adapter, the APID of the
344 adapter is echoed to the respective attribute file. 360 adapter is echoed to the respective attribute file.
345 * assign_domain: 361 assign_domain / unassign_domain:
346 * unassign_domain:
347 Write-only attributes for assigning/unassigning an AP usage domain to/from 362 Write-only attributes for assigning/unassigning an AP usage domain to/from
348 the mediated matrix device. To assign/unassign a domain, the domain 363 the mediated matrix device. To assign/unassign a domain, the domain
349 number of the the usage domain is echoed to the respective attribute 364 number of the the usage domain is echoed to the respective attribute
350 file. 365 file.
351 * matrix: 366 matrix:
352 A read-only file for displaying the APQNs derived from the cross product 367 A read-only file for displaying the APQNs derived from the cross product
353 of the adapter and domain numbers assigned to the mediated matrix device. 368 of the adapter and domain numbers assigned to the mediated matrix device.
354 * assign_control_domain: 369 assign_control_domain / unassign_control_domain:
355 * unassign_control_domain:
356 Write-only attributes for assigning/unassigning an AP control domain 370 Write-only attributes for assigning/unassigning an AP control domain
357 to/from the mediated matrix device. To assign/unassign a control domain, 371 to/from the mediated matrix device. To assign/unassign a control domain,
358 the ID of the domain to be assigned/unassigned is echoed to the respective 372 the ID of the domain to be assigned/unassigned is echoed to the respective
359 attribute file. 373 attribute file.
360 * control_domains: 374 control_domains:
361 A read-only file for displaying the control domain numbers assigned to the 375 A read-only file for displaying the control domain numbers assigned to the
362 mediated matrix device. 376 mediated matrix device.
363 377
364* functions: 378* functions:
365 * create: 379
380 create:
366 allocates the ap_matrix_mdev structure used by the vfio_ap driver to: 381 allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
382
367 * Store the reference to the KVM structure for the guest using the mdev 383 * Store the reference to the KVM structure for the guest using the mdev
368 * Store the AP matrix configuration for the adapters, domains, and control 384 * Store the AP matrix configuration for the adapters, domains, and control
369 domains assigned via the corresponding sysfs attributes files 385 domains assigned via the corresponding sysfs attributes files
370 * remove: 386
387 remove:
371 deallocates the mediated matrix device's ap_matrix_mdev structure. This will 388 deallocates the mediated matrix device's ap_matrix_mdev structure. This will
372 be allowed only if a running guest is not using the mdev. 389 be allowed only if a running guest is not using the mdev.
373 390
374* callback interfaces 391* callback interfaces
375 * open: 392
393 open:
376 The vfio_ap driver uses this callback to register a 394 The vfio_ap driver uses this callback to register a
377 VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix 395 VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
378 device. The open is invoked when QEMU connects the VFIO iommu group 396 device. The open is invoked when QEMU connects the VFIO iommu group
@@ -380,16 +398,17 @@ matrix device.
380 to configure the KVM guest is provided via this callback. The KVM structure, 398 to configure the KVM guest is provided via this callback. The KVM structure,
381 is used to configure the guest's access to the AP matrix defined via the 399 is used to configure the guest's access to the AP matrix defined via the
382 mediated matrix device's sysfs attribute files. 400 mediated matrix device's sysfs attribute files.
383 * release: 401 release:
384 unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the 402 unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
385 mdev matrix device and deconfigures the guest's AP matrix. 403 mdev matrix device and deconfigures the guest's AP matrix.
386 404
387Configure the APM, AQM and ADM in the CRYCB: 405Configure the APM, AQM and ADM in the CRYCB
388------------------------------------------- 406-------------------------------------------
389Configuring the AP matrix for a KVM guest will be performed when the 407Configuring the AP matrix for a KVM guest will be performed when the
390VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier 408VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
391function is called when QEMU connects to KVM. The guest's AP matrix is 409function is called when QEMU connects to KVM. The guest's AP matrix is
392configured via it's CRYCB by: 410configured via it's CRYCB by:
411
393* Setting the bits in the APM corresponding to the APIDs assigned to the 412* Setting the bits in the APM corresponding to the APIDs assigned to the
394 mediated matrix device via its 'assign_adapter' interface. 413 mediated matrix device via its 'assign_adapter' interface.
395* Setting the bits in the AQM corresponding to the domains assigned to the 414* Setting the bits in the AQM corresponding to the domains assigned to the
@@ -418,12 +437,12 @@ available to a KVM guest via the following CPU model features:
418 437
419Note: If the user chooses to specify a CPU model different than the 'host' 438Note: If the user chooses to specify a CPU model different than the 'host'
420model to QEMU, the CPU model features and facilities need to be turned on 439model to QEMU, the CPU model features and facilities need to be turned on
421explicitly; for example: 440explicitly; for example::
422 441
423 /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on 442 /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
424 443
425A guest can be precluded from using AP features/facilities by turning them off 444A guest can be precluded from using AP features/facilities by turning them off
426explicitly; for example: 445explicitly; for example::
427 446
428 /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off 447 /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
429 448
@@ -435,7 +454,7 @@ the APFT facility is not installed on the guest, then the probe of device
435drivers will fail since only type 10 and newer devices can be configured for 454drivers will fail since only type 10 and newer devices can be configured for
436guest use. 455guest use.
437 456
438Example: 457Example
439======= 458=======
440Let's now provide an example to illustrate how KVM guests may be given 459Let's now provide an example to illustrate how KVM guests may be given
441access to AP facilities. For this example, we will show how to configure 460access to AP facilities. For this example, we will show how to configure
@@ -444,30 +463,36 @@ look like this:
444 463
445Guest1 464Guest1
446------ 465------
466=========== ===== ============
447CARD.DOMAIN TYPE MODE 467CARD.DOMAIN TYPE MODE
448------------------------------ 468=========== ===== ============
44905 CEX5C CCA-Coproc 46905 CEX5C CCA-Coproc
45005.0004 CEX5C CCA-Coproc 47005.0004 CEX5C CCA-Coproc
45105.00ab CEX5C CCA-Coproc 47105.00ab CEX5C CCA-Coproc
45206 CEX5A Accelerator 47206 CEX5A Accelerator
45306.0004 CEX5A Accelerator 47306.0004 CEX5A Accelerator
45406.00ab CEX5C CCA-Coproc 47406.00ab CEX5C CCA-Coproc
475=========== ===== ============
455 476
456Guest2 477Guest2
457------ 478------
479=========== ===== ============
458CARD.DOMAIN TYPE MODE 480CARD.DOMAIN TYPE MODE
459------------------------------ 481=========== ===== ============
46005 CEX5A Accelerator 48205 CEX5A Accelerator
46105.0047 CEX5A Accelerator 48305.0047 CEX5A Accelerator
46205.00ff CEX5A Accelerator 48405.00ff CEX5A Accelerator
485=========== ===== ============
463 486
464Guest2 487Guest2
465------ 488------
489=========== ===== ============
466CARD.DOMAIN TYPE MODE 490CARD.DOMAIN TYPE MODE
467------------------------------ 491=========== ===== ============
46806 CEX5A Accelerator 49206 CEX5A Accelerator
46906.0047 CEX5A Accelerator 49306.0047 CEX5A Accelerator
47006.00ff CEX5A Accelerator 49406.00ff CEX5A Accelerator
495=========== ===== ============
471 496
472These are the steps: 497These are the steps:
473 498
@@ -492,25 +517,26 @@ These are the steps:
492 * VFIO_MDEV_DEVICE 517 * VFIO_MDEV_DEVICE
493 * KVM 518 * KVM
494 519
495 If using make menuconfig select the following to build the vfio_ap module: 520 If using make menuconfig select the following to build the vfio_ap module::
496 -> Device Drivers 521
497 -> IOMMU Hardware Support 522 -> Device Drivers
498 select S390 AP IOMMU Support 523 -> IOMMU Hardware Support
499 -> VFIO Non-Privileged userspace driver framework 524 select S390 AP IOMMU Support
500 -> Mediated device driver frramework 525 -> VFIO Non-Privileged userspace driver framework
501 -> VFIO driver for Mediated devices 526 -> Mediated device driver frramework
502 -> I/O subsystem 527 -> VFIO driver for Mediated devices
503 -> VFIO support for AP devices 528 -> I/O subsystem
529 -> VFIO support for AP devices
504 530
5052. Secure the AP queues to be used by the three guests so that the host can not 5312. Secure the AP queues to be used by the three guests so that the host can not
506 access them. To secure them, there are two sysfs files that specify 532 access them. To secure them, there are two sysfs files that specify
507 bitmasks marking a subset of the APQN range as 'usable by the default AP 533 bitmasks marking a subset of the APQN range as 'usable by the default AP
508 queue device drivers' or 'not usable by the default device drivers' and thus 534 queue device drivers' or 'not usable by the default device drivers' and thus
509 available for use by the vfio_ap device driver'. The location of the sysfs 535 available for use by the vfio_ap device driver'. The location of the sysfs
510 files containing the masks are: 536 files containing the masks are::
511 537
512 /sys/bus/ap/apmask 538 /sys/bus/ap/apmask
513 /sys/bus/ap/aqmask 539 /sys/bus/ap/aqmask
514 540
515 The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs 541 The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
516 (APID). Each bit in the mask, from left to right (i.e., from most significant 542 (APID). Each bit in the mask, from left to right (i.e., from most significant
@@ -526,7 +552,7 @@ These are the steps:
526 queue device drivers; otherwise, the APQI is usable by the vfio_ap device 552 queue device drivers; otherwise, the APQI is usable by the vfio_ap device
527 driver. 553 driver.
528 554
529 Take, for example, the following mask: 555 Take, for example, the following mask::
530 556
531 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff 557 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
532 558
@@ -548,68 +574,70 @@ These are the steps:
548 respective sysfs mask file in one of two formats: 574 respective sysfs mask file in one of two formats:
549 575
550 * An absolute hex string starting with 0x - like "0x12345678" - sets 576 * An absolute hex string starting with 0x - like "0x12345678" - sets
551 the mask. If the given string is shorter than the mask, it is padded 577 the mask. If the given string is shorter than the mask, it is padded
552 with 0s on the right; for example, specifying a mask value of 0x41 is 578 with 0s on the right; for example, specifying a mask value of 0x41 is
553 the same as specifying: 579 the same as specifying::
554 580
555 0x4100000000000000000000000000000000000000000000000000000000000000 581 0x4100000000000000000000000000000000000000000000000000000000000000
556 582
557 Keep in mind that the mask reads from left to right (i.e., most 583 Keep in mind that the mask reads from left to right (i.e., most
558 significant to least significant bit in big endian order), so the mask 584 significant to least significant bit in big endian order), so the mask
559 above identifies device numbers 1 and 7 (01000001). 585 above identifies device numbers 1 and 7 (01000001).
560 586
561 If the string is longer than the mask, the operation is terminated with 587 If the string is longer than the mask, the operation is terminated with
562 an error (EINVAL). 588 an error (EINVAL).
563 589
564 * Individual bits in the mask can be switched on and off by specifying 590 * Individual bits in the mask can be switched on and off by specifying
565 each bit number to be switched in a comma separated list. Each bit 591 each bit number to be switched in a comma separated list. Each bit
566 number string must be prepended with a ('+') or minus ('-') to indicate 592 number string must be prepended with a ('+') or minus ('-') to indicate
567 the corresponding bit is to be switched on ('+') or off ('-'). Some 593 the corresponding bit is to be switched on ('+') or off ('-'). Some
568 valid values are: 594 valid values are:
569 595
570 "+0" switches bit 0 on 596 - "+0" switches bit 0 on
571 "-13" switches bit 13 off 597 - "-13" switches bit 13 off
572 "+0x41" switches bit 65 on 598 - "+0x41" switches bit 65 on
573 "-0xff" switches bit 255 off 599 - "-0xff" switches bit 255 off
574 600
575 The following example: 601 The following example:
576 +0,-6,+0x47,-0xf0
577 602
578 Switches bits 0 and 71 (0x47) on 603 +0,-6,+0x47,-0xf0
579 Switches bits 6 and 240 (0xf0) off
580 604
581 Note that the bits not specified in the list remain as they were before 605 Switches bits 0 and 71 (0x47) on
582 the operation. 606
607 Switches bits 6 and 240 (0xf0) off
608
609 Note that the bits not specified in the list remain as they were before
610 the operation.
583 611
584 2. The masks can also be changed at boot time via parameters on the kernel 612 2. The masks can also be changed at boot time via parameters on the kernel
585 command line like this: 613 command line like this:
586 614
587 ap.apmask=0xffff ap.aqmask=0x40 615 ap.apmask=0xffff ap.aqmask=0x40
588 616
589 This would create the following masks: 617 This would create the following masks::
590 618
591 apmask: 619 apmask:
592 0xffff000000000000000000000000000000000000000000000000000000000000 620 0xffff000000000000000000000000000000000000000000000000000000000000
593 621
594 aqmask: 622 aqmask:
595 0x4000000000000000000000000000000000000000000000000000000000000000 623 0x4000000000000000000000000000000000000000000000000000000000000000
596 624
597 Resulting in these two pools: 625 Resulting in these two pools::
598 626
599 default drivers pool: adapter 0-15, domain 1 627 default drivers pool: adapter 0-15, domain 1
600 alternate drivers pool: adapter 16-255, domains 0, 2-255 628 alternate drivers pool: adapter 16-255, domains 0, 2-255
601 629
602 Securing the APQNs for our example: 630Securing the APQNs for our example
603 ---------------------------------- 631----------------------------------
604 To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047, 632 To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
605 06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding 633 06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
606 APQNs can either be removed from the default masks: 634 APQNs can either be removed from the default masks::
607 635
608 echo -5,-6 > /sys/bus/ap/apmask 636 echo -5,-6 > /sys/bus/ap/apmask
609 637
610 echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask 638 echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
611 639
612 Or the masks can be set as follows: 640 Or the masks can be set as follows::
613 641
614 echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \ 642 echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
615 > apmask 643 > apmask
@@ -620,19 +648,19 @@ These are the steps:
620 This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 648 This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
621 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The 649 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
622 sysfs directory for the vfio_ap device driver will now contain symbolic links 650 sysfs directory for the vfio_ap device driver will now contain symbolic links
623 to the AP queue devices bound to it: 651 to the AP queue devices bound to it::
624 652
625 /sys/bus/ap 653 /sys/bus/ap
626 ... [drivers] 654 ... [drivers]
627 ...... [vfio_ap] 655 ...... [vfio_ap]
628 ......... [05.0004] 656 ......... [05.0004]
629 ......... [05.0047] 657 ......... [05.0047]
630 ......... [05.00ab] 658 ......... [05.00ab]
631 ......... [05.00ff] 659 ......... [05.00ff]
632 ......... [06.0004] 660 ......... [06.0004]
633 ......... [06.0047] 661 ......... [06.0047]
634 ......... [06.00ab] 662 ......... [06.00ab]
635 ......... [06.00ff] 663 ......... [06.00ff]
636 664
637 Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later) 665 Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
638 can be bound to the vfio_ap device driver. The reason for this is to 666 can be bound to the vfio_ap device driver. The reason for this is to
@@ -645,96 +673,96 @@ These are the steps:
645 queue device can be read from the parent card's sysfs directory. For example, 673 queue device can be read from the parent card's sysfs directory. For example,
646 to see the hardware type of the queue 05.0004: 674 to see the hardware type of the queue 05.0004:
647 675
648 cat /sys/bus/ap/devices/card05/hwtype 676 cat /sys/bus/ap/devices/card05/hwtype
649 677
650 The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the 678 The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
651 vfio_ap device driver. 679 vfio_ap device driver.
652 680
6533. Create the mediated devices needed to configure the AP matrixes for the 6813. Create the mediated devices needed to configure the AP matrixes for the
654 three guests and to provide an interface to the vfio_ap driver for 682 three guests and to provide an interface to the vfio_ap driver for
655 use by the guests: 683 use by the guests::
656 684
657 /sys/devices/vfio_ap/matrix/ 685 /sys/devices/vfio_ap/matrix/
658 --- [mdev_supported_types] 686 --- [mdev_supported_types]
659 ------ [vfio_ap-passthrough] (passthrough mediated matrix device type) 687 ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
660 --------- create 688 --------- create
661 --------- [devices] 689 --------- [devices]
662 690
663 To create the mediated devices for the three guests: 691 To create the mediated devices for the three guests::
664 692
665 uuidgen > create 693 uuidgen > create
666 uuidgen > create 694 uuidgen > create
667 uuidgen > create 695 uuidgen > create
668 696
669 or 697 or
670 698
671 echo $uuid1 > create 699 echo $uuid1 > create
672 echo $uuid2 > create 700 echo $uuid2 > create
673 echo $uuid3 > create 701 echo $uuid3 > create
674 702
675 This will create three mediated devices in the [devices] subdirectory named 703 This will create three mediated devices in the [devices] subdirectory named
676 after the UUID written to the create attribute file. We call them $uuid1, 704 after the UUID written to the create attribute file. We call them $uuid1,
677 $uuid2 and $uuid3 and this is the sysfs directory structure after creation: 705 $uuid2 and $uuid3 and this is the sysfs directory structure after creation::
678 706
679 /sys/devices/vfio_ap/matrix/ 707 /sys/devices/vfio_ap/matrix/
680 --- [mdev_supported_types] 708 --- [mdev_supported_types]
681 ------ [vfio_ap-passthrough] 709 ------ [vfio_ap-passthrough]
682 --------- [devices] 710 --------- [devices]
683 ------------ [$uuid1] 711 ------------ [$uuid1]
684 --------------- assign_adapter 712 --------------- assign_adapter
685 --------------- assign_control_domain 713 --------------- assign_control_domain
686 --------------- assign_domain 714 --------------- assign_domain
687 --------------- matrix 715 --------------- matrix
688 --------------- unassign_adapter 716 --------------- unassign_adapter
689 --------------- unassign_control_domain 717 --------------- unassign_control_domain
690 --------------- unassign_domain 718 --------------- unassign_domain
691 719
692 ------------ [$uuid2] 720 ------------ [$uuid2]
693 --------------- assign_adapter 721 --------------- assign_adapter
694 --------------- assign_control_domain 722 --------------- assign_control_domain
695 --------------- assign_domain 723 --------------- assign_domain
696 --------------- matrix 724 --------------- matrix
697 --------------- unassign_adapter 725 --------------- unassign_adapter
698 ----------------unassign_control_domain 726 ----------------unassign_control_domain
699 ----------------unassign_domain 727 ----------------unassign_domain
700 728
701 ------------ [$uuid3] 729 ------------ [$uuid3]
702 --------------- assign_adapter 730 --------------- assign_adapter
703 --------------- assign_control_domain 731 --------------- assign_control_domain
704 --------------- assign_domain 732 --------------- assign_domain
705 --------------- matrix 733 --------------- matrix
706 --------------- unassign_adapter 734 --------------- unassign_adapter
707 ----------------unassign_control_domain 735 ----------------unassign_control_domain
708 ----------------unassign_domain 736 ----------------unassign_domain
709 737
7104. The administrator now needs to configure the matrixes for the mediated 7384. The administrator now needs to configure the matrixes for the mediated
711 devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3). 739 devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
712 740
713 This is how the matrix is configured for Guest1: 741 This is how the matrix is configured for Guest1::
714 742
715 echo 5 > assign_adapter 743 echo 5 > assign_adapter
716 echo 6 > assign_adapter 744 echo 6 > assign_adapter
717 echo 4 > assign_domain 745 echo 4 > assign_domain
718 echo 0xab > assign_domain 746 echo 0xab > assign_domain
719 747
720 Control domains can similarly be assigned using the assign_control_domain 748 Control domains can similarly be assigned using the assign_control_domain
721 sysfs file. 749 sysfs file.
722 750
723 If a mistake is made configuring an adapter, domain or control domain, 751 If a mistake is made configuring an adapter, domain or control domain,
724 you can use the unassign_xxx files to unassign the adapter, domain or 752 you can use the unassign_xxx files to unassign the adapter, domain or
725 control domain. 753 control domain.
726 754
727 To display the matrix configuration for Guest1: 755 To display the matrix configuration for Guest1::
728 756
729 cat matrix 757 cat matrix
730 758
731 This is how the matrix is configured for Guest2: 759 This is how the matrix is configured for Guest2::
732 760
733 echo 5 > assign_adapter 761 echo 5 > assign_adapter
734 echo 0x47 > assign_domain 762 echo 0x47 > assign_domain
735 echo 0xff > assign_domain 763 echo 0xff > assign_domain
736 764
737 This is how the matrix is configured for Guest3: 765 This is how the matrix is configured for Guest3::
738 766
739 echo 6 > assign_adapter 767 echo 6 > assign_adapter
740 echo 0x47 > assign_domain 768 echo 0x47 > assign_domain
@@ -783,24 +811,24 @@ These are the steps:
783 configured for the system. If a control domain number higher than the maximum 811 configured for the system. If a control domain number higher than the maximum
784 is specified, the operation will terminate with an error (ENODEV). 812 is specified, the operation will terminate with an error (ENODEV).
785 813
7865. Start Guest1: 8145. Start Guest1::
787 815
788 /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ 816 /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
789 -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ... 817 -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
790 818
7917. Start Guest2: 8197. Start Guest2::
792 820
793 /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ 821 /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
794 -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ... 822 -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
795 823
7967. Start Guest3: 8247. Start Guest3::
797 825
798 /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ 826 /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
799 -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ... 827 -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
800 828
801When the guest is shut down, the mediated matrix devices may be removed. 829When the guest is shut down, the mediated matrix devices may be removed.
802 830
803Using our example again, to remove the mediated matrix device $uuid1: 831Using our example again, to remove the mediated matrix device $uuid1::
804 832
805 /sys/devices/vfio_ap/matrix/ 833 /sys/devices/vfio_ap/matrix/
806 --- [mdev_supported_types] 834 --- [mdev_supported_types]
@@ -809,18 +837,19 @@ Using our example again, to remove the mediated matrix device $uuid1:
809 ------------ [$uuid1] 837 ------------ [$uuid1]
810 --------------- remove 838 --------------- remove
811 839
840::
812 841
813 echo 1 > remove 842 echo 1 > remove
814 843
815 This will remove all of the mdev matrix device's sysfs structures including 844This will remove all of the mdev matrix device's sysfs structures including
816 the mdev device itself. To recreate and reconfigure the mdev matrix device, 845the mdev device itself. To recreate and reconfigure the mdev matrix device,
817 all of the steps starting with step 3 will have to be performed again. Note 846all of the steps starting with step 3 will have to be performed again. Note
818 that the remove will fail if a guest using the mdev is still running. 847that the remove will fail if a guest using the mdev is still running.
819 848
820 It is not necessary to remove an mdev matrix device, but one may want to 849It is not necessary to remove an mdev matrix device, but one may want to
821 remove it if no guest will use it during the remaining lifetime of the linux 850remove it if no guest will use it during the remaining lifetime of the linux
822 host. If the mdev matrix device is removed, one may want to also reconfigure 851host. If the mdev matrix device is removed, one may want to also reconfigure
823 the pool of adapters and queues reserved for use by the default drivers. 852the pool of adapters and queues reserved for use by the default drivers.
824 853
825Limitations 854Limitations
826=========== 855===========
diff --git a/Documentation/s390/vfio-ccw.txt b/Documentation/s390/vfio-ccw.rst
index 2be11ad864ff..1f6d0b56d53e 100644
--- a/Documentation/s390/vfio-ccw.txt
+++ b/Documentation/s390/vfio-ccw.rst
@@ -1,3 +1,4 @@
1==================================
1vfio-ccw: the basic infrastructure 2vfio-ccw: the basic infrastructure
2================================== 3==================================
3 4
@@ -11,9 +12,11 @@ virtual machine, while vfio is the means.
11Different than other hardware architectures, s390 has defined a unified 12Different than other hardware architectures, s390 has defined a unified
12I/O access method, which is so called Channel I/O. It has its own access 13I/O access method, which is so called Channel I/O. It has its own access
13patterns: 14patterns:
15
14- Channel programs run asynchronously on a separate (co)processor. 16- Channel programs run asynchronously on a separate (co)processor.
15- The channel subsystem will access any memory designated by the caller 17- The channel subsystem will access any memory designated by the caller
16 in the channel program directly, i.e. there is no iommu involved. 18 in the channel program directly, i.e. there is no iommu involved.
19
17Thus when we introduce vfio support for these devices, we realize it 20Thus when we introduce vfio support for these devices, we realize it
18with a mediated device (mdev) implementation. The vfio mdev will be 21with a mediated device (mdev) implementation. The vfio mdev will be
19added to an iommu group, so as to make itself able to be managed by the 22added to an iommu group, so as to make itself able to be managed by the
@@ -24,6 +27,7 @@ to perform I/O instructions.
24 27
25This document does not intend to explain the s390 I/O architecture in 28This document does not intend to explain the s390 I/O architecture in
26every detail. More information/reference could be found here: 29every detail. More information/reference could be found here:
30
27- A good start to know Channel I/O in general: 31- A good start to know Channel I/O in general:
28 https://en.wikipedia.org/wiki/Channel_I/O 32 https://en.wikipedia.org/wiki/Channel_I/O
29- s390 architecture: 33- s390 architecture:
@@ -80,6 +84,7 @@ until interrupted. The I/O completion result is received by the
80interrupt handler in the form of interrupt response block (IRB). 84interrupt handler in the form of interrupt response block (IRB).
81 85
82Back to vfio-ccw, in short: 86Back to vfio-ccw, in short:
87
83- ORBs and channel programs are built in guest kernel (with guest 88- ORBs and channel programs are built in guest kernel (with guest
84 physical addresses). 89 physical addresses).
85- ORBs and channel programs are passed to the host kernel. 90- ORBs and channel programs are passed to the host kernel.
@@ -106,6 +111,7 @@ it gets sent to hardware.
106 111
107Within this implementation, we have two drivers for two types of 112Within this implementation, we have two drivers for two types of
108devices: 113devices:
114
109- The vfio_ccw driver for the physical subchannel device. 115- The vfio_ccw driver for the physical subchannel device.
110 This is an I/O subchannel driver for the real subchannel device. It 116 This is an I/O subchannel driver for the real subchannel device. It
111 realizes a group of callbacks and registers to the mdev framework as a 117 realizes a group of callbacks and registers to the mdev framework as a
@@ -137,7 +143,7 @@ devices:
137 vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu 143 vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
138 backend for the physical devices to pin and unpin pages by demand. 144 backend for the physical devices to pin and unpin pages by demand.
139 145
140Below is a high Level block diagram. 146Below is a high Level block diagram::
141 147
142 +-------------+ 148 +-------------+
143 | | 149 | |
@@ -158,6 +164,7 @@ Below is a high Level block diagram.
158 +-------------+ 164 +-------------+
159 165
160The process of how these work together. 166The process of how these work together.
167
1611. vfio_ccw.ko drives the physical I/O subchannel, and registers the 1681. vfio_ccw.ko drives the physical I/O subchannel, and registers the
162 physical device (with callbacks) to mdev framework. 169 physical device (with callbacks) to mdev framework.
163 When vfio_ccw probing the subchannel device, it registers device 170 When vfio_ccw probing the subchannel device, it registers device
@@ -178,17 +185,17 @@ vfio-ccw I/O region
178 185
179An I/O region is used to accept channel program request from user 186An I/O region is used to accept channel program request from user
180space and store I/O interrupt result for user space to retrieve. The 187space and store I/O interrupt result for user space to retrieve. The
181definition of the region is: 188definition of the region is::
182 189
183struct ccw_io_region { 190 struct ccw_io_region {
184#define ORB_AREA_SIZE 12 191 #define ORB_AREA_SIZE 12
185 __u8 orb_area[ORB_AREA_SIZE]; 192 __u8 orb_area[ORB_AREA_SIZE];
186#define SCSW_AREA_SIZE 12 193 #define SCSW_AREA_SIZE 12
187 __u8 scsw_area[SCSW_AREA_SIZE]; 194 __u8 scsw_area[SCSW_AREA_SIZE];
188#define IRB_AREA_SIZE 96 195 #define IRB_AREA_SIZE 96
189 __u8 irb_area[IRB_AREA_SIZE]; 196 __u8 irb_area[IRB_AREA_SIZE];
190 __u32 ret_code; 197 __u32 ret_code;
191} __packed; 198 } __packed;
192 199
193While starting an I/O request, orb_area should be filled with the 200While starting an I/O request, orb_area should be filled with the
194guest ORB, and scsw_area should be filled with the SCSW of the Virtual 201guest ORB, and scsw_area should be filled with the SCSW of the Virtual
@@ -205,7 +212,7 @@ vfio-ccw follows what vfio-pci did on the s390 platform and uses
205vfio-iommu-type1 as the vfio iommu backend. 212vfio-iommu-type1 as the vfio iommu backend.
206 213
207* CCW translation APIs 214* CCW translation APIs
208 A group of APIs (start with 'cp_') to do CCW translation. The CCWs 215 A group of APIs (start with `cp_`) to do CCW translation. The CCWs
209 passed in by a user space program are organized with their guest 216 passed in by a user space program are organized with their guest
210 physical memory addresses. These APIs will copy the CCWs into kernel 217 physical memory addresses. These APIs will copy the CCWs into kernel
211 space, and assemble a runnable kernel channel program by updating the 218 space, and assemble a runnable kernel channel program by updating the
@@ -217,12 +224,14 @@ vfio-iommu-type1 as the vfio iommu backend.
217 This driver utilizes the CCW translation APIs and introduces 224 This driver utilizes the CCW translation APIs and introduces
218 vfio_ccw, which is the driver for the I/O subchannel devices you want 225 vfio_ccw, which is the driver for the I/O subchannel devices you want
219 to pass through. 226 to pass through.
220 vfio_ccw implements the following vfio ioctls: 227 vfio_ccw implements the following vfio ioctls::
228
221 VFIO_DEVICE_GET_INFO 229 VFIO_DEVICE_GET_INFO
222 VFIO_DEVICE_GET_IRQ_INFO 230 VFIO_DEVICE_GET_IRQ_INFO
223 VFIO_DEVICE_GET_REGION_INFO 231 VFIO_DEVICE_GET_REGION_INFO
224 VFIO_DEVICE_RESET 232 VFIO_DEVICE_RESET
225 VFIO_DEVICE_SET_IRQS 233 VFIO_DEVICE_SET_IRQS
234
226 This provides an I/O region, so that the user space program can pass a 235 This provides an I/O region, so that the user space program can pass a
227 channel program to the kernel, to do further CCW translation before 236 channel program to the kernel, to do further CCW translation before
228 issuing them to a real device. 237 issuing them to a real device.
@@ -236,32 +245,49 @@ bit more detail how an I/O request triggered by the QEMU guest will be
236handled (without error handling). 245handled (without error handling).
237 246
238Explanation: 247Explanation:
239Q1-Q7: QEMU side process.
240K1-K5: Kernel side process.
241 248
242Q1. Get I/O region info during initialization. 249- Q1-Q7: QEMU side process.
243Q2. Setup event notifier and handler to handle I/O completion. 250- K1-K5: Kernel side process.
251
252Q1.
253 Get I/O region info during initialization.
254
255Q2.
256 Setup event notifier and handler to handle I/O completion.
244 257
245... ... 258... ...
246 259
247Q3. Intercept a ssch instruction. 260Q3.
248Q4. Write the guest channel program and ORB to the I/O region. 261 Intercept a ssch instruction.
249 K1. Copy from guest to kernel. 262Q4.
250 K2. Translate the guest channel program to a host kernel space 263 Write the guest channel program and ORB to the I/O region.
251 channel program, which becomes runnable for a real device. 264
252 K3. With the necessary information contained in the orb passed in 265 K1.
253 by QEMU, issue the ccwchain to the device. 266 Copy from guest to kernel.
254 K4. Return the ssch CC code. 267 K2.
255Q5. Return the CC code to the guest. 268 Translate the guest channel program to a host kernel space
269 channel program, which becomes runnable for a real device.
270 K3.
271 With the necessary information contained in the orb passed in
272 by QEMU, issue the ccwchain to the device.
273 K4.
274 Return the ssch CC code.
275Q5.
276 Return the CC code to the guest.
256 277
257... ... 278... ...
258 279
259 K5. Interrupt handler gets the I/O result and write the result to 280 K5.
260 the I/O region. 281 Interrupt handler gets the I/O result and write the result to
261 K6. Signal QEMU to retrieve the result. 282 the I/O region.
262Q6. Get the signal and event handler reads out the result from the I/O 283 K6.
284 Signal QEMU to retrieve the result.
285
286Q6.
287 Get the signal and event handler reads out the result from the I/O
263 region. 288 region.
264Q7. Update the irb for the guest. 289Q7.
290 Update the irb for the guest.
265 291
266Limitations 292Limitations
267----------- 293-----------
@@ -295,6 +321,6 @@ Reference
2951. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832) 3211. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
2962. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204) 3222. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
2973. https://en.wikipedia.org/wiki/Channel_I/O 3233. https://en.wikipedia.org/wiki/Channel_I/O
2984. Documentation/s390/cds.txt 3244. Documentation/s390/cds.rst
2995. Documentation/vfio.txt 3255. Documentation/vfio.txt
3006. Documentation/vfio-mediated-device.txt 3266. Documentation/vfio-mediated-device.txt
diff --git a/Documentation/s390/zfcpdump.txt b/Documentation/s390/zfcpdump.rst
index b064aa59714d..54e8e7caf7e7 100644
--- a/Documentation/s390/zfcpdump.txt
+++ b/Documentation/s390/zfcpdump.rst
@@ -1,4 +1,6 @@
1==================================
1The s390 SCSI dump tool (zfcpdump) 2The s390 SCSI dump tool (zfcpdump)
3==================================
2 4
3System z machines (z900 or higher) provide hardware support for creating system 5System z machines (z900 or higher) provide hardware support for creating system
4dumps on SCSI disks. The dump process is initiated by booting a dump tool, which 6dumps on SCSI disks. The dump process is initiated by booting a dump tool, which
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index f0c86fbb3b48..5af8b131ccbc 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -23,7 +23,6 @@ show up in /proc/sys/kernel:
23- auto_msgmni 23- auto_msgmni
24- bootloader_type [ X86 only ] 24- bootloader_type [ X86 only ]
25- bootloader_version [ X86 only ] 25- bootloader_version [ X86 only ]
26- callhome [ S390 only ]
27- cap_last_cap 26- cap_last_cap
28- core_pattern 27- core_pattern
29- core_pipe_limit 28- core_pipe_limit
@@ -171,21 +170,6 @@ Documentation/x86/boot.txt for additional information.
171 170
172============================================================== 171==============================================================
173 172
174callhome:
175
176Controls the kernel's callhome behavior in case of a kernel panic.
177
178The s390 hardware allows an operating system to send a notification
179to a service organization (callhome) in case of an operating system panic.
180
181When the value in this file is 0 (which is the default behavior)
182nothing happens in case of a kernel panic. If this value is set to "1"
183the complete kernel oops message is send to the IBM customer service
184organization in case the mainframe the Linux operating system is running
185on has a service contract with IBM.
186
187==============================================================
188
189cap_last_cap 173cap_last_cap
190 174
191Highest valid capability of the running kernel. Exports 175Highest valid capability of the running kernel. Exports
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index db0b9d8619f1..5f3c74dcad43 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -24,7 +24,7 @@ Documentation/memory-barriers.txt
24 ========================= 24 =========================
25 25
26저자: David Howells <dhowells@redhat.com> 26저자: David Howells <dhowells@redhat.com>
27 Paul E. McKenney <paulmck@linux.vnet.ibm.com> 27 Paul E. McKenney <paulmck@linux.ibm.com>
28 Will Deacon <will.deacon@arm.com> 28 Will Deacon <will.deacon@arm.com>
29 Peter Zijlstra <peterz@infradead.org> 29 Peter Zijlstra <peterz@infradead.org>
30 30
diff --git a/MAINTAINERS b/MAINTAINERS
index d0ed735994a5..a2b97d38a704 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1306,6 +1306,12 @@ S: Maintained
1306F: Documentation/devicetree/bindings/interrupt-controller/arm,vic.txt 1306F: Documentation/devicetree/bindings/interrupt-controller/arm,vic.txt
1307F: drivers/irqchip/irq-vic.c 1307F: drivers/irqchip/irq-vic.c
1308 1308
1309AMAZON ANNAPURNA LABS FIC DRIVER
1310M: Talel Shenhar <talel@amazon.com>
1311S: Maintained
1312F: Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
1313F: drivers/irqchip/irq-al-fic.c
1314
1309ARM SMMU DRIVERS 1315ARM SMMU DRIVERS
1310M: Will Deacon <will@kernel.org> 1316M: Will Deacon <will@kernel.org>
1311R: Robin Murphy <robin.murphy@arm.com> 1317R: Robin Murphy <robin.murphy@arm.com>
@@ -3122,6 +3128,7 @@ F: arch/arm/mach-bcm/
3122BROADCOM BCM2835 ARM ARCHITECTURE 3128BROADCOM BCM2835 ARM ARCHITECTURE
3123M: Eric Anholt <eric@anholt.net> 3129M: Eric Anholt <eric@anholt.net>
3124M: Stefan Wahren <wahrenst@gmx.net> 3130M: Stefan Wahren <wahrenst@gmx.net>
3131L: bcm-kernel-feedback-list@broadcom.com
3125L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) 3132L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
3126L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 3133L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
3127T: git git://github.com/anholt/linux 3134T: git git://github.com/anholt/linux
@@ -3151,6 +3158,7 @@ F: arch/arm/boot/dts/bcm953012*
3151 3158
3152BROADCOM BCM53573 ARM ARCHITECTURE 3159BROADCOM BCM53573 ARM ARCHITECTURE
3153M: Rafał Miłecki <rafal@milecki.pl> 3160M: Rafał Miłecki <rafal@milecki.pl>
3161L: bcm-kernel-feedback-list@broadcom.com
3154L: linux-arm-kernel@lists.infradead.org 3162L: linux-arm-kernel@lists.infradead.org
3155S: Maintained 3163S: Maintained
3156F: arch/arm/boot/dts/bcm53573* 3164F: arch/arm/boot/dts/bcm53573*
@@ -3940,6 +3948,14 @@ M: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
3940S: Maintained 3948S: Maintained
3941F: .clang-format 3949F: .clang-format
3942 3950
3951CLANG/LLVM BUILD SUPPORT
3952L: clang-built-linux@googlegroups.com
3953W: https://clangbuiltlinux.github.io/
3954B: https://github.com/ClangBuiltLinux/linux/issues
3955C: irc://chat.freenode.net/clangbuiltlinux
3956S: Supported
3957K: \b(?i:clang|llvm)\b
3958
3943CLEANCACHE API 3959CLEANCACHE API
3944M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 3960M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
3945L: linux-kernel@vger.kernel.org 3961L: linux-kernel@vger.kernel.org
@@ -6327,6 +6343,13 @@ L: linux-i2c@vger.kernel.org
6327S: Maintained 6343S: Maintained
6328F: drivers/i2c/busses/i2c-cpm.c 6344F: drivers/i2c/busses/i2c-cpm.c
6329 6345
6346FREESCALE IMX DDR PMU DRIVER
6347M: Frank Li <Frank.li@nxp.com>
6348L: linux-arm-kernel@lists.infradead.org
6349S: Maintained
6350F: drivers/perf/fsl_imx8_ddr_perf.c
6351F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
6352
6330FREESCALE IMX LPI2C DRIVER 6353FREESCALE IMX LPI2C DRIVER
6331M: Dong Aisheng <aisheng.dong@nxp.com> 6354M: Dong Aisheng <aisheng.dong@nxp.com>
6332L: linux-i2c@vger.kernel.org 6355L: linux-i2c@vger.kernel.org
@@ -6664,6 +6687,18 @@ L: kvm@vger.kernel.org
6664S: Supported 6687S: Supported
6665F: drivers/uio/uio_pci_generic.c 6688F: drivers/uio/uio_pci_generic.c
6666 6689
6690GENERIC VDSO LIBRARY:
6691M: Andy Lutomirski <luto@kernel.org>
6692M: Thomas Gleixner <tglx@linutronix.de>
6693M: Vincenzo Frascino <vincenzo.frascino@arm.com>
6694L: linux-kernel@vger.kernel.org
6695T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso
6696S: Maintained
6697F: lib/vdso/
6698F: kernel/time/vsyscall.c
6699F: include/vdso/
6700F: include/asm-generic/vdso/vsyscall.h
6701
6667GENWQE (IBM Generic Workqueue Card) 6702GENWQE (IBM Generic Workqueue Card)
6668M: Frank Haverkamp <haver@linux.ibm.com> 6703M: Frank Haverkamp <haver@linux.ibm.com>
6669S: Supported 6704S: Supported
@@ -7301,6 +7336,7 @@ F: arch/x86/include/asm/trace/hyperv.h
7301F: arch/x86/include/asm/hyperv-tlfs.h 7336F: arch/x86/include/asm/hyperv-tlfs.h
7302F: arch/x86/kernel/cpu/mshyperv.c 7337F: arch/x86/kernel/cpu/mshyperv.c
7303F: arch/x86/hyperv 7338F: arch/x86/hyperv
7339F: drivers/clocksource/hyperv_timer.c
7304F: drivers/hid/hid-hyperv.c 7340F: drivers/hid/hid-hyperv.c
7305F: drivers/hv/ 7341F: drivers/hv/
7306F: drivers/input/serio/hyperv-keyboard.c 7342F: drivers/input/serio/hyperv-keyboard.c
@@ -7311,6 +7347,7 @@ F: drivers/uio/uio_hv_generic.c
7311F: drivers/video/fbdev/hyperv_fb.c 7347F: drivers/video/fbdev/hyperv_fb.c
7312F: drivers/iommu/hyperv_iommu.c 7348F: drivers/iommu/hyperv_iommu.c
7313F: net/vmw_vsock/hyperv_transport.c 7349F: net/vmw_vsock/hyperv_transport.c
7350F: include/clocksource/hyperv_timer.h
7314F: include/linux/hyperv.h 7351F: include/linux/hyperv.h
7315F: include/uapi/linux/hyperv.h 7352F: include/uapi/linux/hyperv.h
7316F: tools/hv/ 7353F: tools/hv/
@@ -7800,7 +7837,7 @@ INGENIC JZ4780 NAND DRIVER
7800M: Harvey Hunt <harveyhuntnexus@gmail.com> 7837M: Harvey Hunt <harveyhuntnexus@gmail.com>
7801L: linux-mtd@lists.infradead.org 7838L: linux-mtd@lists.infradead.org
7802S: Maintained 7839S: Maintained
7803F: drivers/mtd/nand/raw/jz4780_* 7840F: drivers/mtd/nand/raw/ingenic/
7804 7841
7805INOTIFY 7842INOTIFY
7806M: Jan Kara <jack@suse.cz> 7843M: Jan Kara <jack@suse.cz>
@@ -13701,7 +13738,7 @@ L: linux-s390@vger.kernel.org
13701L: kvm@vger.kernel.org 13738L: kvm@vger.kernel.org
13702S: Supported 13739S: Supported
13703F: drivers/s390/cio/vfio_ccw* 13740F: drivers/s390/cio/vfio_ccw*
13704F: Documentation/s390/vfio-ccw.txt 13741F: Documentation/s390/vfio-ccw.rst
13705F: include/uapi/linux/vfio_ccw.h 13742F: include/uapi/linux/vfio_ccw.h
13706 13743
13707S390 ZCRYPT DRIVER 13744S390 ZCRYPT DRIVER
@@ -13721,7 +13758,7 @@ S: Supported
13721F: drivers/s390/crypto/vfio_ap_drv.c 13758F: drivers/s390/crypto/vfio_ap_drv.c
13722F: drivers/s390/crypto/vfio_ap_private.h 13759F: drivers/s390/crypto/vfio_ap_private.h
13723F: drivers/s390/crypto/vfio_ap_ops.c 13760F: drivers/s390/crypto/vfio_ap_ops.c
13724F: Documentation/s390/vfio-ap.txt 13761F: Documentation/s390/vfio-ap.rst
13725 13762
13726S390 ZFCP DRIVER 13763S390 ZFCP DRIVER
13727M: Steffen Maier <maier@linux.ibm.com> 13764M: Steffen Maier <maier@linux.ibm.com>
@@ -15493,6 +15530,7 @@ F: drivers/dma/tegra*
15493 15530
15494TEGRA I2C DRIVER 15531TEGRA I2C DRIVER
15495M: Laxman Dewangan <ldewangan@nvidia.com> 15532M: Laxman Dewangan <ldewangan@nvidia.com>
15533R: Dmitry Osipenko <digetx@gmail.com>
15496S: Supported 15534S: Supported
15497F: drivers/i2c/busses/i2c-tegra.c 15535F: drivers/i2c/busses/i2c-tegra.c
15498 15536
@@ -17485,6 +17523,12 @@ Q: https://patchwork.linuxtv.org/project/linux-media/list/
17485S: Maintained 17523S: Maintained
17486F: drivers/media/dvb-frontends/zd1301_demod* 17524F: drivers/media/dvb-frontends/zd1301_demod*
17487 17525
17526ZHAOXIN PROCESSOR SUPPORT
17527M: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
17528L: linux-kernel@vger.kernel.org
17529S: Maintained
17530F: arch/x86/kernel/cpu/zhaoxin.c
17531
17488ZPOOL COMPRESSED PAGE STORAGE API 17532ZPOOL COMPRESSED PAGE STORAGE API
17489M: Dan Streetman <ddstreet@ieee.org> 17533M: Dan Streetman <ddstreet@ieee.org>
17490L: linux-mm@kvack.org 17534L: linux-mm@kvack.org
diff --git a/Makefile b/Makefile
index 7a7c17eb0cbf..3e4868a6498b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
2VERSION = 5 2VERSION = 5
3PATCHLEVEL = 2 3PATCHLEVEL = 2
4SUBLEVEL = 0 4SUBLEVEL = 0
5EXTRAVERSION = -rc6 5EXTRAVERSION =
6NAME = Golden Lions 6NAME = Bobtail Squid
7 7
8# *DOCUMENTATION* 8# *DOCUMENTATION*
9# To see a list of typical targets execute "make help" 9# To see a list of typical targets execute "make help"
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 150a1c5d6a2c..2144530d1428 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -93,9 +93,9 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
93} 93}
94 94
95#define ATOMIC64_OP(op, asm_op) \ 95#define ATOMIC64_OP(op, asm_op) \
96static __inline__ void atomic64_##op(long i, atomic64_t * v) \ 96static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \
97{ \ 97{ \
98 unsigned long temp; \ 98 s64 temp; \
99 __asm__ __volatile__( \ 99 __asm__ __volatile__( \
100 "1: ldq_l %0,%1\n" \ 100 "1: ldq_l %0,%1\n" \
101 " " #asm_op " %0,%2,%0\n" \ 101 " " #asm_op " %0,%2,%0\n" \
@@ -109,9 +109,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
109} \ 109} \
110 110
111#define ATOMIC64_OP_RETURN(op, asm_op) \ 111#define ATOMIC64_OP_RETURN(op, asm_op) \
112static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ 112static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \
113{ \ 113{ \
114 long temp, result; \ 114 s64 temp, result; \
115 __asm__ __volatile__( \ 115 __asm__ __volatile__( \
116 "1: ldq_l %0,%1\n" \ 116 "1: ldq_l %0,%1\n" \
117 " " #asm_op " %0,%3,%2\n" \ 117 " " #asm_op " %0,%3,%2\n" \
@@ -128,9 +128,9 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
128} 128}
129 129
130#define ATOMIC64_FETCH_OP(op, asm_op) \ 130#define ATOMIC64_FETCH_OP(op, asm_op) \
131static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ 131static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \
132{ \ 132{ \
133 long temp, result; \ 133 s64 temp, result; \
134 __asm__ __volatile__( \ 134 __asm__ __volatile__( \
135 "1: ldq_l %2,%1\n" \ 135 "1: ldq_l %2,%1\n" \
136 " " #asm_op " %2,%3,%0\n" \ 136 " " #asm_op " %2,%3,%0\n" \
@@ -246,9 +246,9 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
246 * Atomically adds @a to @v, so long as it was not @u. 246 * Atomically adds @a to @v, so long as it was not @u.
247 * Returns the old value of @v. 247 * Returns the old value of @v.
248 */ 248 */
249static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) 249static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
250{ 250{
251 long c, new, old; 251 s64 c, new, old;
252 smp_mb(); 252 smp_mb();
253 __asm__ __volatile__( 253 __asm__ __volatile__(
254 "1: ldq_l %[old],%[mem]\n" 254 "1: ldq_l %[old],%[mem]\n"
@@ -276,9 +276,9 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
276 * The function returns the old value of *v minus 1, even if 276 * The function returns the old value of *v minus 1, even if
277 * the atomic variable, v, was not decremented. 277 * the atomic variable, v, was not decremented.
278 */ 278 */
279static inline long atomic64_dec_if_positive(atomic64_t *v) 279static inline s64 atomic64_dec_if_positive(atomic64_t *v)
280{ 280{
281 long old, tmp; 281 s64 old, tmp;
282 smp_mb(); 282 smp_mb();
283 __asm__ __volatile__( 283 __asm__ __volatile__(
284 "1: ldq_l %[old],%[mem]\n" 284 "1: ldq_l %[old],%[mem]\n"
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index d0dccae53ba9..5f90df30be20 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -614,8 +614,7 @@ void
614smp_imb(void) 614smp_imb(void)
615{ 615{
616 /* Must wait other processors to flush their icache before continue. */ 616 /* Must wait other processors to flush their icache before continue. */
617 if (on_each_cpu(ipi_imb, NULL, 1)) 617 on_each_cpu(ipi_imb, NULL, 1);
618 printk(KERN_CRIT "smp_imb: timed out\n");
619} 618}
620EXPORT_SYMBOL(smp_imb); 619EXPORT_SYMBOL(smp_imb);
621 620
@@ -630,9 +629,7 @@ flush_tlb_all(void)
630{ 629{
631 /* Although we don't have any data to pass, we do want to 630 /* Although we don't have any data to pass, we do want to
632 synchronize with the other processors. */ 631 synchronize with the other processors. */
633 if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) { 632 on_each_cpu(ipi_flush_tlb_all, NULL, 1);
634 printk(KERN_CRIT "flush_tlb_all: timed out\n");
635 }
636} 633}
637 634
638#define asn_locked() (cpu_data[smp_processor_id()].asn_lock) 635#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
@@ -667,9 +664,7 @@ flush_tlb_mm(struct mm_struct *mm)
667 } 664 }
668 } 665 }
669 666
670 if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) { 667 smp_call_function(ipi_flush_tlb_mm, mm, 1);
671 printk(KERN_CRIT "flush_tlb_mm: timed out\n");
672 }
673 668
674 preempt_enable(); 669 preempt_enable();
675} 670}
@@ -720,9 +715,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
720 data.mm = mm; 715 data.mm = mm;
721 data.addr = addr; 716 data.addr = addr;
722 717
723 if (smp_call_function(ipi_flush_tlb_page, &data, 1)) { 718 smp_call_function(ipi_flush_tlb_page, &data, 1);
724 printk(KERN_CRIT "flush_tlb_page: timed out\n");
725 }
726 719
727 preempt_enable(); 720 preempt_enable();
728} 721}
@@ -772,9 +765,7 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
772 } 765 }
773 } 766 }
774 767
775 if (smp_call_function(ipi_flush_icache_page, mm, 1)) { 768 smp_call_function(ipi_flush_icache_page, mm, 1);
776 printk(KERN_CRIT "flush_icache_page: timed out\n");
777 }
778 769
779 preempt_enable(); 770 preempt_enable();
780} 771}
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index 310a4ce1dccc..1b1259c7d7d1 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -65,7 +65,7 @@ op_axp_setup(void)
65 model->reg_setup(&reg, ctr, &sys); 65 model->reg_setup(&reg, ctr, &sys);
66 66
67 /* Configure the registers on all cpus. */ 67 /* Configure the registers on all cpus. */
68 (void)smp_call_function(model->cpu_setup, &reg, 1); 68 smp_call_function(model->cpu_setup, &reg, 1);
69 model->cpu_setup(&reg); 69 model->cpu_setup(&reg);
70 return 0; 70 return 0;
71} 71}
@@ -86,7 +86,7 @@ op_axp_cpu_start(void *dummy)
86static int 86static int
87op_axp_start(void) 87op_axp_start(void)
88{ 88{
89 (void)smp_call_function(op_axp_cpu_start, NULL, 1); 89 smp_call_function(op_axp_cpu_start, NULL, 1);
90 op_axp_cpu_start(NULL); 90 op_axp_cpu_start(NULL);
91 return 0; 91 return 0;
92} 92}
@@ -101,7 +101,7 @@ op_axp_cpu_stop(void *dummy)
101static void 101static void
102op_axp_stop(void) 102op_axp_stop(void)
103{ 103{
104 (void)smp_call_function(op_axp_cpu_stop, NULL, 1); 104 smp_call_function(op_axp_cpu_stop, NULL, 1);
105 op_axp_cpu_stop(NULL); 105 op_axp_cpu_stop(NULL);
106} 106}
107 107
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 480af1af9e63..03a0b19c92cd 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -5,6 +5,10 @@
5 5
6KBUILD_DEFCONFIG := nsim_hs_defconfig 6KBUILD_DEFCONFIG := nsim_hs_defconfig
7 7
8ifeq ($(CROSS_COMPILE),)
9CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-)
10endif
11
8cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ 12cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
9cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 13cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7
10cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 14cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 17cf1c657cb3..7298ce84762e 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -321,14 +321,14 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
321 */ 321 */
322 322
323typedef struct { 323typedef struct {
324 aligned_u64 counter; 324 s64 __aligned(8) counter;
325} atomic64_t; 325} atomic64_t;
326 326
327#define ATOMIC64_INIT(a) { (a) } 327#define ATOMIC64_INIT(a) { (a) }
328 328
329static inline long long atomic64_read(const atomic64_t *v) 329static inline s64 atomic64_read(const atomic64_t *v)
330{ 330{
331 unsigned long long val; 331 s64 val;
332 332
333 __asm__ __volatile__( 333 __asm__ __volatile__(
334 " ldd %0, [%1] \n" 334 " ldd %0, [%1] \n"
@@ -338,7 +338,7 @@ static inline long long atomic64_read(const atomic64_t *v)
338 return val; 338 return val;
339} 339}
340 340
341static inline void atomic64_set(atomic64_t *v, long long a) 341static inline void atomic64_set(atomic64_t *v, s64 a)
342{ 342{
343 /* 343 /*
344 * This could have been a simple assignment in "C" but would need 344 * This could have been a simple assignment in "C" but would need
@@ -359,9 +359,9 @@ static inline void atomic64_set(atomic64_t *v, long long a)
359} 359}
360 360
361#define ATOMIC64_OP(op, op1, op2) \ 361#define ATOMIC64_OP(op, op1, op2) \
362static inline void atomic64_##op(long long a, atomic64_t *v) \ 362static inline void atomic64_##op(s64 a, atomic64_t *v) \
363{ \ 363{ \
364 unsigned long long val; \ 364 s64 val; \
365 \ 365 \
366 __asm__ __volatile__( \ 366 __asm__ __volatile__( \
367 "1: \n" \ 367 "1: \n" \
@@ -372,13 +372,13 @@ static inline void atomic64_##op(long long a, atomic64_t *v) \
372 " bnz 1b \n" \ 372 " bnz 1b \n" \
373 : "=&r"(val) \ 373 : "=&r"(val) \
374 : "r"(&v->counter), "ir"(a) \ 374 : "r"(&v->counter), "ir"(a) \
375 : "cc"); \ 375 : "cc"); \
376} \ 376} \
377 377
378#define ATOMIC64_OP_RETURN(op, op1, op2) \ 378#define ATOMIC64_OP_RETURN(op, op1, op2) \
379static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \ 379static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v) \
380{ \ 380{ \
381 unsigned long long val; \ 381 s64 val; \
382 \ 382 \
383 smp_mb(); \ 383 smp_mb(); \
384 \ 384 \
@@ -399,9 +399,9 @@ static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \
399} 399}
400 400
401#define ATOMIC64_FETCH_OP(op, op1, op2) \ 401#define ATOMIC64_FETCH_OP(op, op1, op2) \
402static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \ 402static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \
403{ \ 403{ \
404 unsigned long long val, orig; \ 404 s64 val, orig; \
405 \ 405 \
406 smp_mb(); \ 406 smp_mb(); \
407 \ 407 \
@@ -441,10 +441,10 @@ ATOMIC64_OPS(xor, xor, xor)
441#undef ATOMIC64_OP_RETURN 441#undef ATOMIC64_OP_RETURN
442#undef ATOMIC64_OP 442#undef ATOMIC64_OP
443 443
444static inline long long 444static inline s64
445atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new) 445atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
446{ 446{
447 long long prev; 447 s64 prev;
448 448
449 smp_mb(); 449 smp_mb();
450 450
@@ -464,9 +464,9 @@ atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
464 return prev; 464 return prev;
465} 465}
466 466
467static inline long long atomic64_xchg(atomic64_t *ptr, long long new) 467static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new)
468{ 468{
469 long long prev; 469 s64 prev;
470 470
471 smp_mb(); 471 smp_mb();
472 472
@@ -492,9 +492,9 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
492 * the atomic variable, v, was not decremented. 492 * the atomic variable, v, was not decremented.
493 */ 493 */
494 494
495static inline long long atomic64_dec_if_positive(atomic64_t *v) 495static inline s64 atomic64_dec_if_positive(atomic64_t *v)
496{ 496{
497 long long val; 497 s64 val;
498 498
499 smp_mb(); 499 smp_mb();
500 500
@@ -525,10 +525,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
525 * Atomically adds @a to @v, if it was not @u. 525 * Atomically adds @a to @v, if it was not @u.
526 * Returns the old value of @v 526 * Returns the old value of @v
527 */ 527 */
528static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, 528static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
529 long long u)
530{ 529{
531 long long old, temp; 530 s64 old, temp;
532 531
533 smp_mb(); 532 smp_mb();
534 533
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
index 6a91a742ab3d..7dd2dd335cf6 100644
--- a/arch/arc/plat-hsdk/platform.c
+++ b/arch/arc/plat-hsdk/platform.c
@@ -32,8 +32,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu)
32 32
33#define ARC_PERIPHERAL_BASE 0xf0000000 33#define ARC_PERIPHERAL_BASE 0xf0000000
34#define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) 34#define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000)
35#define CREG_PAE (CREG_BASE + 0x180)
36#define CREG_PAE_UPDATE (CREG_BASE + 0x194)
37 35
38#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) 36#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000)
39#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) 37#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108)
@@ -99,20 +97,167 @@ static void __init hsdk_enable_gpio_intc_wire(void)
99 iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN); 97 iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN);
100} 98}
101 99
102static void __init hsdk_init_early(void) 100enum hsdk_axi_masters {
101 M_HS_CORE = 0,
102 M_HS_RTT,
103 M_AXI_TUN,
104 M_HDMI_VIDEO,
105 M_HDMI_AUDIO,
106 M_USB_HOST,
107 M_ETHERNET,
108 M_SDIO,
109 M_GPU,
110 M_DMAC_0,
111 M_DMAC_1,
112 M_DVFS
113};
114
115#define UPDATE_VAL 1
116
117/*
118 * This is modified configuration of AXI bridge. Default settings
119 * are specified in "Table 111 CREG Address Decoder register reset values".
120 *
121 * AXI_M_m_SLV{0|1} - Slave Select register for master 'm'.
122 * Possible slaves are:
123 * - 0 => no slave selected
124 * - 1 => DDR controller port #1
125 * - 2 => SRAM controller
126 * - 3 => AXI tunnel
127 * - 4 => EBI controller
128 * - 5 => ROM controller
129 * - 6 => AXI2APB bridge
130 * - 7 => DDR controller port #2
131 * - 8 => DDR controller port #3
132 * - 9 => HS38x4 IOC
133 * - 10 => HS38x4 DMI
134 * AXI_M_m_OFFSET{0|1} - Addr Offset register for master 'm'
135 *
136 * Please read ARC HS Development IC Specification, section 17.2 for more
137 * information about apertures configuration.
138 *
139 * m master AXI_M_m_SLV0 AXI_M_m_SLV1 AXI_M_m_OFFSET0 AXI_M_m_OFFSET1
140 * 0 HS (CBU) 0x11111111 0x63111111 0xFEDCBA98 0x0E543210
141 * 1 HS (RTT) 0x77777777 0x77777777 0xFEDCBA98 0x76543210
142 * 2 AXI Tunnel 0x88888888 0x88888888 0xFEDCBA98 0x76543210
143 * 3 HDMI-VIDEO 0x77777777 0x77777777 0xFEDCBA98 0x76543210
144 * 4 HDMI-ADUIO 0x77777777 0x77777777 0xFEDCBA98 0x76543210
145 * 5 USB-HOST 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98
146 * 6 ETHERNET 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98
147 * 7 SDIO 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98
148 * 8 GPU 0x77777777 0x77777777 0xFEDCBA98 0x76543210
149 * 9 DMAC (port #1) 0x77777777 0x77777777 0xFEDCBA98 0x76543210
150 * 10 DMAC (port #2) 0x77777777 0x77777777 0xFEDCBA98 0x76543210
151 * 11 DVFS 0x00000000 0x60000000 0x00000000 0x00000000
152 */
153
154#define CREG_AXI_M_SLV0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m)))
155#define CREG_AXI_M_SLV1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x04))
156#define CREG_AXI_M_OFT0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x08))
157#define CREG_AXI_M_OFT1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x0C))
158#define CREG_AXI_M_UPDT(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x14))
159
160#define CREG_AXI_M_HS_CORE_BOOT ((void __iomem *)(CREG_BASE + 0x010))
161
162#define CREG_PAE ((void __iomem *)(CREG_BASE + 0x180))
163#define CREG_PAE_UPDT ((void __iomem *)(CREG_BASE + 0x194))
164
165static void __init hsdk_init_memory_bridge(void)
103{ 166{
167 u32 reg;
168
169 /*
170 * M_HS_CORE has one unique register - BOOT.
171 * We need to clean boot mirror (BOOT[1:0]) bits in them to avoid first
172 * aperture to be masked by 'boot mirror'.
173 */
174 reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3);
175 writel(reg, CREG_AXI_M_HS_CORE_BOOT);
176 writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE));
177 writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE));
178 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE));
179 writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE));
180 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE));
181
182 writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT));
183 writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT));
184 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT));
185 writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT));
186 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT));
187
188 writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN));
189 writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN));
190 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN));
191 writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN));
192 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN));
193
194 writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO));
195 writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO));
196 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO));
197 writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO));
198 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO));
199
200 writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO));
201 writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO));
202 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO));
203 writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO));
204 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO));
205
206 writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST));
207 writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST));
208 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST));
209 writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST));
210 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST));
211
212 writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET));
213 writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET));
214 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET));
215 writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET));
216 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET));
217
218 writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO));
219 writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO));
220 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO));
221 writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO));
222 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO));
223
224 writel(0x77777777, CREG_AXI_M_SLV0(M_GPU));
225 writel(0x77777777, CREG_AXI_M_SLV1(M_GPU));
226 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU));
227 writel(0x76543210, CREG_AXI_M_OFT1(M_GPU));
228 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU));
229
230 writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0));
231 writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0));
232 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0));
233 writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0));
234 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0));
235
236 writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1));
237 writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1));
238 writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1));
239 writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1));
240 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1));
241
242 writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS));
243 writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS));
244 writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS));
245 writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS));
246 writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS));
247
104 /* 248 /*
105 * PAE remapping for DMA clients does not work due to an RTL bug, so 249 * PAE remapping for DMA clients does not work due to an RTL bug, so
106 * CREG_PAE register must be programmed to all zeroes, otherwise it 250 * CREG_PAE register must be programmed to all zeroes, otherwise it
107 * will cause problems with DMA to/from peripherals even if PAE40 is 251 * will cause problems with DMA to/from peripherals even if PAE40 is
108 * not used. 252 * not used.
109 */ 253 */
254 writel(0x00000000, CREG_PAE);
255 writel(UPDATE_VAL, CREG_PAE_UPDT);
256}
110 257
111 /* Default is 1, which means "PAE offset = 4GByte" */ 258static void __init hsdk_init_early(void)
112 writel_relaxed(0, (void __iomem *) CREG_PAE); 259{
113 260 hsdk_init_memory_bridge();
114 /* Really apply settings made above */
115 writel(1, (void __iomem *) CREG_PAE_UPDATE);
116 261
117 /* 262 /*
118 * Switch SDIO external ciu clock divider from default div-by-8 to 263 * Switch SDIO external ciu clock divider from default div-by-8 to
diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
index 59753470cd34..267d0c178e55 100644
--- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
+++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
@@ -336,3 +336,11 @@
336 status = "disabled"; 336 status = "disabled";
337}; 337};
338 338
339&uart0 {
340 compatible = "marvell,armada-38x-uart";
341};
342
343&uart1 {
344 compatible = "marvell,armada-38x-uart";
345};
346
diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts
index cfbfbc91a1e1..3613f05f8a80 100644
--- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts
+++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts
@@ -20,7 +20,7 @@
20 }; 20 };
21 21
22 chosen { 22 chosen {
23 bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait"; 23 bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait consoleblank=300";
24 stdout-path = "uart0:19200n8"; 24 stdout-path = "uart0:19200n8";
25 }; 25 };
26 26
diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts
index b12504e10f0b..360642a02a48 100644
--- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts
+++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts
@@ -11,7 +11,7 @@
11 11
12/ { 12/ {
13 model = "D-Link DNS-313 1-Bay Network Storage Enclosure"; 13 model = "D-Link DNS-313 1-Bay Network Storage Enclosure";
14 compatible = "dlink,dir-313", "cortina,gemini"; 14 compatible = "dlink,dns-313", "cortina,gemini";
15 #address-cells = <1>; 15 #address-cells = <1>;
16 #size-cells = <1>; 16 #size-cells = <1>;
17 17
diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
index bbf010c73336..a7f6d1d58e20 100644
--- a/arch/arm/boot/dts/imx6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul.dtsi
@@ -358,7 +358,7 @@
358 pwm1: pwm@2080000 { 358 pwm1: pwm@2080000 {
359 compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; 359 compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
360 reg = <0x02080000 0x4000>; 360 reg = <0x02080000 0x4000>;
361 interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>; 361 interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
362 clocks = <&clks IMX6UL_CLK_PWM1>, 362 clocks = <&clks IMX6UL_CLK_PWM1>,
363 <&clks IMX6UL_CLK_PWM1>; 363 <&clks IMX6UL_CLK_PWM1>;
364 clock-names = "ipg", "per"; 364 clock-names = "ipg", "per";
@@ -369,7 +369,7 @@
369 pwm2: pwm@2084000 { 369 pwm2: pwm@2084000 {
370 compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; 370 compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
371 reg = <0x02084000 0x4000>; 371 reg = <0x02084000 0x4000>;
372 interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>; 372 interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
373 clocks = <&clks IMX6UL_CLK_PWM2>, 373 clocks = <&clks IMX6UL_CLK_PWM2>,
374 <&clks IMX6UL_CLK_PWM2>; 374 <&clks IMX6UL_CLK_PWM2>;
375 clock-names = "ipg", "per"; 375 clock-names = "ipg", "per";
@@ -380,7 +380,7 @@
380 pwm3: pwm@2088000 { 380 pwm3: pwm@2088000 {
381 compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; 381 compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
382 reg = <0x02088000 0x4000>; 382 reg = <0x02088000 0x4000>;
383 interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>; 383 interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
384 clocks = <&clks IMX6UL_CLK_PWM3>, 384 clocks = <&clks IMX6UL_CLK_PWM3>,
385 <&clks IMX6UL_CLK_PWM3>; 385 <&clks IMX6UL_CLK_PWM3>;
386 clock-names = "ipg", "per"; 386 clock-names = "ipg", "per";
@@ -391,7 +391,7 @@
391 pwm4: pwm@208c000 { 391 pwm4: pwm@208c000 {
392 compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; 392 compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
393 reg = <0x0208c000 0x4000>; 393 reg = <0x0208c000 0x4000>;
394 interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>; 394 interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
395 clocks = <&clks IMX6UL_CLK_PWM4>, 395 clocks = <&clks IMX6UL_CLK_PWM4>,
396 <&clks IMX6UL_CLK_PWM4>; 396 <&clks IMX6UL_CLK_PWM4>;
397 clock-names = "ipg", "per"; 397 clock-names = "ipg", "per";
diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi
index 7ef442462ea4..40c11b6b217a 100644
--- a/arch/arm/boot/dts/meson8.dtsi
+++ b/arch/arm/boot/dts/meson8.dtsi
@@ -248,8 +248,8 @@
248 <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>, 248 <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>,
249 <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, 249 <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
250 <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, 250 <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
251 <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>, 251 <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
252 <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, 252 <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>,
253 <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>, 253 <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>,
254 <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, 254 <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>,
255 <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>, 255 <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>,
@@ -264,7 +264,6 @@
264 clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; 264 clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>;
265 clock-names = "bus", "core"; 265 clock-names = "bus", "core";
266 operating-points-v2 = <&gpu_opp_table>; 266 operating-points-v2 = <&gpu_opp_table>;
267 switch-delay = <0xffff>;
268 }; 267 };
269 }; 268 };
270}; /* end of / */ 269}; /* end of / */
diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi
index 800cd65fc50a..ec67f49116d9 100644
--- a/arch/arm/boot/dts/meson8b.dtsi
+++ b/arch/arm/boot/dts/meson8b.dtsi
@@ -163,23 +163,23 @@
163 163
164 opp-255000000 { 164 opp-255000000 {
165 opp-hz = /bits/ 64 <255000000>; 165 opp-hz = /bits/ 64 <255000000>;
166 opp-microvolt = <1150000>; 166 opp-microvolt = <1100000>;
167 }; 167 };
168 opp-364300000 { 168 opp-364300000 {
169 opp-hz = /bits/ 64 <364300000>; 169 opp-hz = /bits/ 64 <364300000>;
170 opp-microvolt = <1150000>; 170 opp-microvolt = <1100000>;
171 }; 171 };
172 opp-425000000 { 172 opp-425000000 {
173 opp-hz = /bits/ 64 <425000000>; 173 opp-hz = /bits/ 64 <425000000>;
174 opp-microvolt = <1150000>; 174 opp-microvolt = <1100000>;
175 }; 175 };
176 opp-510000000 { 176 opp-510000000 {
177 opp-hz = /bits/ 64 <510000000>; 177 opp-hz = /bits/ 64 <510000000>;
178 opp-microvolt = <1150000>; 178 opp-microvolt = <1100000>;
179 }; 179 };
180 opp-637500000 { 180 opp-637500000 {
181 opp-hz = /bits/ 64 <637500000>; 181 opp-hz = /bits/ 64 <637500000>;
182 opp-microvolt = <1150000>; 182 opp-microvolt = <1100000>;
183 turbo-mode; 183 turbo-mode;
184 }; 184 };
185 }; 185 };
@@ -229,7 +229,6 @@
229 clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; 229 clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>;
230 clock-names = "bus", "core"; 230 clock-names = "bus", "core";
231 operating-points-v2 = <&gpu_opp_table>; 231 operating-points-v2 = <&gpu_opp_table>;
232 switch-delay = <0xffff>;
233 }; 232 };
234 }; 233 };
235}; /* end of / */ 234}; /* end of / */
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 13e561737ca8..746e1fce777e 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -539,16 +539,14 @@ static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
539 539
540int bL_switcher_trace_trigger(void) 540int bL_switcher_trace_trigger(void)
541{ 541{
542 int ret;
543
544 preempt_disable(); 542 preempt_disable();
545 543
546 bL_switcher_trace_trigger_cpu(NULL); 544 bL_switcher_trace_trigger_cpu(NULL);
547 ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); 545 smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
548 546
549 preempt_enable(); 547 preempt_enable();
550 548
551 return ret; 549 return 0;
552} 550}
553EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); 551EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
554 552
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 4b66ecd6be99..99175812d903 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -4,6 +4,7 @@
4 4
5#include <asm/barrier.h> 5#include <asm/barrier.h>
6#include <asm/errno.h> 6#include <asm/errno.h>
7#include <asm/hwcap.h>
7#include <linux/clocksource.h> 8#include <linux/clocksource.h>
8#include <linux/init.h> 9#include <linux/init.h>
9#include <linux/types.h> 10#include <linux/types.h>
@@ -124,6 +125,15 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
124 isb(); 125 isb();
125} 126}
126 127
128static inline void arch_timer_set_evtstrm_feature(void)
129{
130 elf_hwcap |= HWCAP_EVTSTRM;
131}
132
133static inline bool arch_timer_have_evtstrm_feature(void)
134{
135 return elf_hwcap & HWCAP_EVTSTRM;
136}
127#endif 137#endif
128 138
129#endif 139#endif
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 50c3ac5f0809..75bb2c543e59 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -246,15 +246,15 @@ ATOMIC_OPS(xor, ^=, eor)
246 246
247#ifndef CONFIG_GENERIC_ATOMIC64 247#ifndef CONFIG_GENERIC_ATOMIC64
248typedef struct { 248typedef struct {
249 long long counter; 249 s64 counter;
250} atomic64_t; 250} atomic64_t;
251 251
252#define ATOMIC64_INIT(i) { (i) } 252#define ATOMIC64_INIT(i) { (i) }
253 253
254#ifdef CONFIG_ARM_LPAE 254#ifdef CONFIG_ARM_LPAE
255static inline long long atomic64_read(const atomic64_t *v) 255static inline s64 atomic64_read(const atomic64_t *v)
256{ 256{
257 long long result; 257 s64 result;
258 258
259 __asm__ __volatile__("@ atomic64_read\n" 259 __asm__ __volatile__("@ atomic64_read\n"
260" ldrd %0, %H0, [%1]" 260" ldrd %0, %H0, [%1]"
@@ -265,7 +265,7 @@ static inline long long atomic64_read(const atomic64_t *v)
265 return result; 265 return result;
266} 266}
267 267
268static inline void atomic64_set(atomic64_t *v, long long i) 268static inline void atomic64_set(atomic64_t *v, s64 i)
269{ 269{
270 __asm__ __volatile__("@ atomic64_set\n" 270 __asm__ __volatile__("@ atomic64_set\n"
271" strd %2, %H2, [%1]" 271" strd %2, %H2, [%1]"
@@ -274,9 +274,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
274 ); 274 );
275} 275}
276#else 276#else
277static inline long long atomic64_read(const atomic64_t *v) 277static inline s64 atomic64_read(const atomic64_t *v)
278{ 278{
279 long long result; 279 s64 result;
280 280
281 __asm__ __volatile__("@ atomic64_read\n" 281 __asm__ __volatile__("@ atomic64_read\n"
282" ldrexd %0, %H0, [%1]" 282" ldrexd %0, %H0, [%1]"
@@ -287,9 +287,9 @@ static inline long long atomic64_read(const atomic64_t *v)
287 return result; 287 return result;
288} 288}
289 289
290static inline void atomic64_set(atomic64_t *v, long long i) 290static inline void atomic64_set(atomic64_t *v, s64 i)
291{ 291{
292 long long tmp; 292 s64 tmp;
293 293
294 prefetchw(&v->counter); 294 prefetchw(&v->counter);
295 __asm__ __volatile__("@ atomic64_set\n" 295 __asm__ __volatile__("@ atomic64_set\n"
@@ -304,9 +304,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
304#endif 304#endif
305 305
306#define ATOMIC64_OP(op, op1, op2) \ 306#define ATOMIC64_OP(op, op1, op2) \
307static inline void atomic64_##op(long long i, atomic64_t *v) \ 307static inline void atomic64_##op(s64 i, atomic64_t *v) \
308{ \ 308{ \
309 long long result; \ 309 s64 result; \
310 unsigned long tmp; \ 310 unsigned long tmp; \
311 \ 311 \
312 prefetchw(&v->counter); \ 312 prefetchw(&v->counter); \
@@ -323,10 +323,10 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \
323} \ 323} \
324 324
325#define ATOMIC64_OP_RETURN(op, op1, op2) \ 325#define ATOMIC64_OP_RETURN(op, op1, op2) \
326static inline long long \ 326static inline s64 \
327atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ 327atomic64_##op##_return_relaxed(s64 i, atomic64_t *v) \
328{ \ 328{ \
329 long long result; \ 329 s64 result; \
330 unsigned long tmp; \ 330 unsigned long tmp; \
331 \ 331 \
332 prefetchw(&v->counter); \ 332 prefetchw(&v->counter); \
@@ -346,10 +346,10 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \
346} 346}
347 347
348#define ATOMIC64_FETCH_OP(op, op1, op2) \ 348#define ATOMIC64_FETCH_OP(op, op1, op2) \
349static inline long long \ 349static inline s64 \
350atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \ 350atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v) \
351{ \ 351{ \
352 long long result, val; \ 352 s64 result, val; \
353 unsigned long tmp; \ 353 unsigned long tmp; \
354 \ 354 \
355 prefetchw(&v->counter); \ 355 prefetchw(&v->counter); \
@@ -403,10 +403,9 @@ ATOMIC64_OPS(xor, eor, eor)
403#undef ATOMIC64_OP_RETURN 403#undef ATOMIC64_OP_RETURN
404#undef ATOMIC64_OP 404#undef ATOMIC64_OP
405 405
406static inline long long 406static inline s64 atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new)
407atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
408{ 407{
409 long long oldval; 408 s64 oldval;
410 unsigned long res; 409 unsigned long res;
411 410
412 prefetchw(&ptr->counter); 411 prefetchw(&ptr->counter);
@@ -427,9 +426,9 @@ atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
427} 426}
428#define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed 427#define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed
429 428
430static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new) 429static inline s64 atomic64_xchg_relaxed(atomic64_t *ptr, s64 new)
431{ 430{
432 long long result; 431 s64 result;
433 unsigned long tmp; 432 unsigned long tmp;
434 433
435 prefetchw(&ptr->counter); 434 prefetchw(&ptr->counter);
@@ -447,9 +446,9 @@ static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
447} 446}
448#define atomic64_xchg_relaxed atomic64_xchg_relaxed 447#define atomic64_xchg_relaxed atomic64_xchg_relaxed
449 448
450static inline long long atomic64_dec_if_positive(atomic64_t *v) 449static inline s64 atomic64_dec_if_positive(atomic64_t *v)
451{ 450{
452 long long result; 451 s64 result;
453 unsigned long tmp; 452 unsigned long tmp;
454 453
455 smp_mb(); 454 smp_mb();
@@ -475,10 +474,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
475} 474}
476#define atomic64_dec_if_positive atomic64_dec_if_positive 475#define atomic64_dec_if_positive atomic64_dec_if_positive
477 476
478static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, 477static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
479 long long u)
480{ 478{
481 long long oldval, newval; 479 s64 oldval, newval;
482 unsigned long tmp; 480 unsigned long tmp;
483 481
484 smp_mb(); 482 smp_mb();
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 51a892702e27..a273ab25c668 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -61,6 +61,9 @@ static struct regulator_consumer_supply da830_evm_usb_supplies[] = {
61static struct regulator_init_data da830_evm_usb_vbus_data = { 61static struct regulator_init_data da830_evm_usb_vbus_data = {
62 .consumer_supplies = da830_evm_usb_supplies, 62 .consumer_supplies = da830_evm_usb_supplies,
63 .num_consumer_supplies = ARRAY_SIZE(da830_evm_usb_supplies), 63 .num_consumer_supplies = ARRAY_SIZE(da830_evm_usb_supplies),
64 .constraints = {
65 .valid_ops_mask = REGULATOR_CHANGE_STATUS,
66 },
64}; 67};
65 68
66static struct fixed_voltage_config da830_evm_usb_vbus = { 69static struct fixed_voltage_config da830_evm_usb_vbus = {
@@ -88,7 +91,7 @@ static struct gpiod_lookup_table da830_evm_usb_oc_gpio_lookup = {
88static struct gpiod_lookup_table da830_evm_usb_vbus_gpio_lookup = { 91static struct gpiod_lookup_table da830_evm_usb_vbus_gpio_lookup = {
89 .dev_id = "reg-fixed-voltage.0", 92 .dev_id = "reg-fixed-voltage.0",
90 .table = { 93 .table = {
91 GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, "vbus", 0), 94 GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, NULL, 0),
92 { } 95 { }
93 }, 96 },
94}; 97};
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index db177a6a7e48..5390a8630cf0 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -306,6 +306,9 @@ static struct regulator_consumer_supply hawk_usb_supplies[] = {
306static struct regulator_init_data hawk_usb_vbus_data = { 306static struct regulator_init_data hawk_usb_vbus_data = {
307 .consumer_supplies = hawk_usb_supplies, 307 .consumer_supplies = hawk_usb_supplies,
308 .num_consumer_supplies = ARRAY_SIZE(hawk_usb_supplies), 308 .num_consumer_supplies = ARRAY_SIZE(hawk_usb_supplies),
309 .constraints = {
310 .valid_ops_mask = REGULATOR_CHANGE_STATUS,
311 },
309}; 312};
310 313
311static struct fixed_voltage_config hawk_usb_vbus = { 314static struct fixed_voltage_config hawk_usb_vbus = {
diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c
index fd4a3bf27993..1b442b128569 100644
--- a/arch/arm/mach-omap2/prm3xxx.c
+++ b/arch/arm/mach-omap2/prm3xxx.c
@@ -430,7 +430,7 @@ static void omap3_prm_reconfigure_io_chain(void)
430 * registers, and omap3xxx_prm_reconfigure_io_chain() must be called. 430 * registers, and omap3xxx_prm_reconfigure_io_chain() must be called.
431 * No return value. 431 * No return value.
432 */ 432 */
433static void __init omap3xxx_prm_enable_io_wakeup(void) 433static void omap3xxx_prm_enable_io_wakeup(void)
434{ 434{
435 if (prm_features & PRM_HAS_IO_WAKEUP) 435 if (prm_features & PRM_HAS_IO_WAKEUP)
436 omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD, 436 omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD,
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 697ea0510729..c1734e444fb8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -26,6 +26,7 @@ config ARM64
26 select ARCH_HAS_MEMBARRIER_SYNC_CORE 26 select ARCH_HAS_MEMBARRIER_SYNC_CORE
27 select ARCH_HAS_PTE_SPECIAL 27 select ARCH_HAS_PTE_SPECIAL
28 select ARCH_HAS_SETUP_DMA_OPS 28 select ARCH_HAS_SETUP_DMA_OPS
29 select ARCH_HAS_SET_DIRECT_MAP
29 select ARCH_HAS_SET_MEMORY 30 select ARCH_HAS_SET_MEMORY
30 select ARCH_HAS_STRICT_KERNEL_RWX 31 select ARCH_HAS_STRICT_KERNEL_RWX
31 select ARCH_HAS_STRICT_MODULE_RWX 32 select ARCH_HAS_STRICT_MODULE_RWX
@@ -107,6 +108,8 @@ config ARM64
107 select GENERIC_STRNCPY_FROM_USER 108 select GENERIC_STRNCPY_FROM_USER
108 select GENERIC_STRNLEN_USER 109 select GENERIC_STRNLEN_USER
109 select GENERIC_TIME_VSYSCALL 110 select GENERIC_TIME_VSYSCALL
111 select GENERIC_GETTIMEOFDAY
112 select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT)
110 select HANDLE_DOMAIN_IRQ 113 select HANDLE_DOMAIN_IRQ
111 select HARDIRQS_SW_RESEND 114 select HARDIRQS_SW_RESEND
112 select HAVE_PCI 115 select HAVE_PCI
@@ -160,6 +163,7 @@ config ARM64
160 select HAVE_SYSCALL_TRACEPOINTS 163 select HAVE_SYSCALL_TRACEPOINTS
161 select HAVE_KPROBES 164 select HAVE_KPROBES
162 select HAVE_KRETPROBES 165 select HAVE_KRETPROBES
166 select HAVE_GENERIC_VDSO
163 select IOMMU_DMA if IOMMU_SUPPORT 167 select IOMMU_DMA if IOMMU_SUPPORT
164 select IRQ_DOMAIN 168 select IRQ_DOMAIN
165 select IRQ_FORCED_THREADING 169 select IRQ_FORCED_THREADING
@@ -260,7 +264,8 @@ config GENERIC_CALIBRATE_DELAY
260 def_bool y 264 def_bool y
261 265
262config ZONE_DMA32 266config ZONE_DMA32
263 def_bool y 267 bool "Support DMA32 zone" if EXPERT
268 default y
264 269
265config HAVE_GENERIC_GUP 270config HAVE_GENERIC_GUP
266 def_bool y 271 def_bool y
@@ -933,7 +938,6 @@ config PARAVIRT
933config PARAVIRT_TIME_ACCOUNTING 938config PARAVIRT_TIME_ACCOUNTING
934 bool "Paravirtual steal time accounting" 939 bool "Paravirtual steal time accounting"
935 select PARAVIRT 940 select PARAVIRT
936 default n
937 help 941 help
938 Select this option to enable fine granularity task steal time 942 Select this option to enable fine granularity task steal time
939 accounting. Time spent executing other tasks in parallel with 943 accounting. Time spent executing other tasks in parallel with
@@ -1418,12 +1422,27 @@ config ARM64_SVE
1418 KVM in the same kernel image. 1422 KVM in the same kernel image.
1419 1423
1420config ARM64_MODULE_PLTS 1424config ARM64_MODULE_PLTS
1421 bool 1425 bool "Use PLTs to allow module memory to spill over into vmalloc area"
1426 depends on MODULES
1422 select HAVE_MOD_ARCH_SPECIFIC 1427 select HAVE_MOD_ARCH_SPECIFIC
1428 help
1429 Allocate PLTs when loading modules so that jumps and calls whose
1430 targets are too far away for their relative offsets to be encoded
1431 in the instructions themselves can be bounced via veneers in the
1432 module's PLT. This allows modules to be allocated in the generic
1433 vmalloc area after the dedicated module memory area has been
1434 exhausted.
1435
1436 When running with address space randomization (KASLR), the module
1437 region itself may be too far away for ordinary relative jumps and
1438 calls, and so in that case, module PLTs are required and cannot be
1439 disabled.
1440
1441 Specific errata workaround(s) might also force module PLTs to be
1442 enabled (ARM64_ERRATUM_843419).
1423 1443
1424config ARM64_PSEUDO_NMI 1444config ARM64_PSEUDO_NMI
1425 bool "Support for NMI-like interrupts" 1445 bool "Support for NMI-like interrupts"
1426 depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com
1427 select CONFIG_ARM_GIC_V3 1446 select CONFIG_ARM_GIC_V3
1428 help 1447 help
1429 Adds support for mimicking Non-Maskable Interrupts through the use of 1448 Adds support for mimicking Non-Maskable Interrupts through the use of
@@ -1436,6 +1455,17 @@ config ARM64_PSEUDO_NMI
1436 1455
1437 If unsure, say N 1456 If unsure, say N
1438 1457
1458if ARM64_PSEUDO_NMI
1459config ARM64_DEBUG_PRIORITY_MASKING
1460 bool "Debug interrupt priority masking"
1461 help
1462 This adds runtime checks to functions enabling/disabling
1463 interrupts when using priority masking. The additional checks verify
1464 the validity of ICC_PMR_EL1 when calling concerned functions.
1465
1466 If unsure, say N
1467endif
1468
1439config RELOCATABLE 1469config RELOCATABLE
1440 bool 1470 bool
1441 help 1471 help
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index e9d2e578cbe6..e3d3fd0a4268 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -49,10 +49,26 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable)
49 endif 49 endif
50endif 50endif
51 51
52KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) 52ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
53 CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%)
54
55 ifeq ($(CONFIG_CC_IS_CLANG), y)
56 $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built)
57 else ifeq ($(CROSS_COMPILE_COMPAT),)
58 $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built)
59 else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),)
60 $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT)
61 else
62 export CROSS_COMPILE_COMPAT
63 export CONFIG_COMPAT_VDSO := y
64 compat_vdso := -DCONFIG_COMPAT_VDSO=1
65 endif
66endif
67
68KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
53KBUILD_CFLAGS += -fno-asynchronous-unwind-tables 69KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
54KBUILD_CFLAGS += $(call cc-disable-warning, psabi) 70KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
55KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) 71KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso)
56 72
57KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) 73KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
58KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) 74KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
@@ -164,6 +180,9 @@ ifeq ($(KBUILD_EXTMOD),)
164prepare: vdso_prepare 180prepare: vdso_prepare
165vdso_prepare: prepare0 181vdso_prepare: prepare0
166 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h 182 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
183 $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \
184 $(build)=arch/arm64/kernel/vdso32 \
185 include/generated/vdso32-offsets.h)
167endif 186endif
168 187
169define archhelp 188define archhelp
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index b04581249f0b..bf7f845447ed 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -28,7 +28,7 @@
28 enable-method = "psci"; 28 enable-method = "psci";
29 clocks = <&clockgen 1 0>; 29 clocks = <&clockgen 1 0>;
30 next-level-cache = <&l2>; 30 next-level-cache = <&l2>;
31 cpu-idle-states = <&CPU_PH20>; 31 cpu-idle-states = <&CPU_PW20>;
32 }; 32 };
33 33
34 cpu1: cpu@1 { 34 cpu1: cpu@1 {
@@ -38,7 +38,7 @@
38 enable-method = "psci"; 38 enable-method = "psci";
39 clocks = <&clockgen 1 0>; 39 clocks = <&clockgen 1 0>;
40 next-level-cache = <&l2>; 40 next-level-cache = <&l2>;
41 cpu-idle-states = <&CPU_PH20>; 41 cpu-idle-states = <&CPU_PW20>;
42 }; 42 };
43 43
44 l2: l2-cache { 44 l2: l2-cache {
@@ -53,13 +53,13 @@
53 */ 53 */
54 entry-method = "arm,psci"; 54 entry-method = "arm,psci";
55 55
56 CPU_PH20: cpu-ph20 { 56 CPU_PW20: cpu-pw20 {
57 compatible = "arm,idle-state"; 57 compatible = "arm,idle-state";
58 idle-state-name = "PH20"; 58 idle-state-name = "PW20";
59 arm,psci-suspend-param = <0x00010000>; 59 arm,psci-suspend-param = <0x0>;
60 entry-latency-us = <1000>; 60 entry-latency-us = <2000>;
61 exit-latency-us = <1000>; 61 exit-latency-us = <2000>;
62 min-residency-us = <3000>; 62 min-residency-us = <6000>;
63 }; 63 };
64 }; 64 };
65 65
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 4d583514258c..dd827e64e5fe 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -68,6 +68,7 @@ CONFIG_KEXEC=y
68CONFIG_CRASH_DUMP=y 68CONFIG_CRASH_DUMP=y
69CONFIG_XEN=y 69CONFIG_XEN=y
70CONFIG_COMPAT=y 70CONFIG_COMPAT=y
71CONFIG_RANDOMIZE_BASE=y
71CONFIG_HIBERNATION=y 72CONFIG_HIBERNATION=y
72CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y 73CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
73CONFIG_ARM_CPUIDLE=y 74CONFIG_ARM_CPUIDLE=y
@@ -613,6 +614,7 @@ CONFIG_RTC_DRV_TEGRA=y
613CONFIG_RTC_DRV_IMX_SC=m 614CONFIG_RTC_DRV_IMX_SC=m
614CONFIG_RTC_DRV_XGENE=y 615CONFIG_RTC_DRV_XGENE=y
615CONFIG_DMADEVICES=y 616CONFIG_DMADEVICES=y
617CONFIG_FSL_EDMA=y
616CONFIG_DMA_BCM2835=m 618CONFIG_DMA_BCM2835=m
617CONFIG_K3_DMA=y 619CONFIG_K3_DMA=y
618CONFIG_MV_XOR=y 620CONFIG_MV_XOR=y
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index ada0bc480a1b..b263e239cb59 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -38,6 +38,9 @@
38 (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ 38 (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
39 (unsigned long)(entry) + (entry)->header.length > (end)) 39 (unsigned long)(entry) + (entry)->header.length > (end))
40 40
41#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \
42 spe_interrupt) + sizeof(u16))
43
41/* Basic configuration for ACPI */ 44/* Basic configuration for ACPI */
42#ifdef CONFIG_ACPI 45#ifdef CONFIG_ACPI
43pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); 46pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 2247908e55d6..79155a8cfe7c 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -152,7 +152,9 @@ static inline bool gic_prio_masking_enabled(void)
152 152
153static inline void gic_pmr_mask_irqs(void) 153static inline void gic_pmr_mask_irqs(void)
154{ 154{
155 BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF); 155 BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
156 GIC_PRIO_PSR_I_SET));
157 BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
156 gic_write_pmr(GIC_PRIO_IRQOFF); 158 gic_write_pmr(GIC_PRIO_IRQOFF);
157} 159}
158 160
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 6756178c27db..7ae54d7d333a 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -9,6 +9,7 @@
9#define __ASM_ARCH_TIMER_H 9#define __ASM_ARCH_TIMER_H
10 10
11#include <asm/barrier.h> 11#include <asm/barrier.h>
12#include <asm/hwcap.h>
12#include <asm/sysreg.h> 13#include <asm/sysreg.h>
13 14
14#include <linux/bug.h> 15#include <linux/bug.h>
@@ -229,4 +230,16 @@ static inline int arch_timer_arch_init(void)
229 return 0; 230 return 0;
230} 231}
231 232
233static inline void arch_timer_set_evtstrm_feature(void)
234{
235 cpu_set_named_feature(EVTSTRM);
236#ifdef CONFIG_COMPAT
237 compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
238#endif
239}
240
241static inline bool arch_timer_have_evtstrm_feature(void)
242{
243 return cpu_have_named_feature(EVTSTRM);
244}
232#endif 245#endif
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 23c378606aed..c8c850bc3dfb 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -122,9 +122,9 @@ ATOMIC_OPS(xor, eor)
122 122
123#define ATOMIC64_OP(op, asm_op) \ 123#define ATOMIC64_OP(op, asm_op) \
124__LL_SC_INLINE void \ 124__LL_SC_INLINE void \
125__LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \ 125__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v)) \
126{ \ 126{ \
127 long result; \ 127 s64 result; \
128 unsigned long tmp; \ 128 unsigned long tmp; \
129 \ 129 \
130 asm volatile("// atomic64_" #op "\n" \ 130 asm volatile("// atomic64_" #op "\n" \
@@ -139,10 +139,10 @@ __LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \
139__LL_SC_EXPORT(arch_atomic64_##op); 139__LL_SC_EXPORT(arch_atomic64_##op);
140 140
141#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ 141#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
142__LL_SC_INLINE long \ 142__LL_SC_INLINE s64 \
143__LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\ 143__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
144{ \ 144{ \
145 long result; \ 145 s64 result; \
146 unsigned long tmp; \ 146 unsigned long tmp; \
147 \ 147 \
148 asm volatile("// atomic64_" #op "_return" #name "\n" \ 148 asm volatile("// atomic64_" #op "_return" #name "\n" \
@@ -161,10 +161,10 @@ __LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\
161__LL_SC_EXPORT(arch_atomic64_##op##_return##name); 161__LL_SC_EXPORT(arch_atomic64_##op##_return##name);
162 162
163#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ 163#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
164__LL_SC_INLINE long \ 164__LL_SC_INLINE s64 \
165__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(long i, atomic64_t *v)) \ 165__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)) \
166{ \ 166{ \
167 long result, val; \ 167 s64 result, val; \
168 unsigned long tmp; \ 168 unsigned long tmp; \
169 \ 169 \
170 asm volatile("// atomic64_fetch_" #op #name "\n" \ 170 asm volatile("// atomic64_fetch_" #op #name "\n" \
@@ -214,10 +214,10 @@ ATOMIC64_OPS(xor, eor)
214#undef ATOMIC64_OP_RETURN 214#undef ATOMIC64_OP_RETURN
215#undef ATOMIC64_OP 215#undef ATOMIC64_OP
216 216
217__LL_SC_INLINE long 217__LL_SC_INLINE s64
218__LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v)) 218__LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
219{ 219{
220 long result; 220 s64 result;
221 unsigned long tmp; 221 unsigned long tmp;
222 222
223 asm volatile("// atomic64_dec_if_positive\n" 223 asm volatile("// atomic64_dec_if_positive\n"
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 45e030d54332..69acb1c19a15 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -213,9 +213,9 @@ ATOMIC_FETCH_OP_SUB( , al, "memory")
213 213
214#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(arch_atomic64_##op) 214#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(arch_atomic64_##op)
215#define ATOMIC64_OP(op, asm_op) \ 215#define ATOMIC64_OP(op, asm_op) \
216static inline void arch_atomic64_##op(long i, atomic64_t *v) \ 216static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
217{ \ 217{ \
218 register long x0 asm ("x0") = i; \ 218 register s64 x0 asm ("x0") = i; \
219 register atomic64_t *x1 asm ("x1") = v; \ 219 register atomic64_t *x1 asm ("x1") = v; \
220 \ 220 \
221 asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \ 221 asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \
@@ -233,9 +233,9 @@ ATOMIC64_OP(add, stadd)
233#undef ATOMIC64_OP 233#undef ATOMIC64_OP
234 234
235#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ 235#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
236static inline long arch_atomic64_fetch_##op##name(long i, atomic64_t *v)\ 236static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
237{ \ 237{ \
238 register long x0 asm ("x0") = i; \ 238 register s64 x0 asm ("x0") = i; \
239 register atomic64_t *x1 asm ("x1") = v; \ 239 register atomic64_t *x1 asm ("x1") = v; \
240 \ 240 \
241 asm volatile(ARM64_LSE_ATOMIC_INSN( \ 241 asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -265,9 +265,9 @@ ATOMIC64_FETCH_OPS(add, ldadd)
265#undef ATOMIC64_FETCH_OPS 265#undef ATOMIC64_FETCH_OPS
266 266
267#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ 267#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
268static inline long arch_atomic64_add_return##name(long i, atomic64_t *v)\ 268static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v) \
269{ \ 269{ \
270 register long x0 asm ("x0") = i; \ 270 register s64 x0 asm ("x0") = i; \
271 register atomic64_t *x1 asm ("x1") = v; \ 271 register atomic64_t *x1 asm ("x1") = v; \
272 \ 272 \
273 asm volatile(ARM64_LSE_ATOMIC_INSN( \ 273 asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -291,9 +291,9 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory")
291 291
292#undef ATOMIC64_OP_ADD_RETURN 292#undef ATOMIC64_OP_ADD_RETURN
293 293
294static inline void arch_atomic64_and(long i, atomic64_t *v) 294static inline void arch_atomic64_and(s64 i, atomic64_t *v)
295{ 295{
296 register long x0 asm ("x0") = i; 296 register s64 x0 asm ("x0") = i;
297 register atomic64_t *x1 asm ("x1") = v; 297 register atomic64_t *x1 asm ("x1") = v;
298 298
299 asm volatile(ARM64_LSE_ATOMIC_INSN( 299 asm volatile(ARM64_LSE_ATOMIC_INSN(
@@ -309,9 +309,9 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
309} 309}
310 310
311#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ 311#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
312static inline long arch_atomic64_fetch_and##name(long i, atomic64_t *v) \ 312static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
313{ \ 313{ \
314 register long x0 asm ("x0") = i; \ 314 register s64 x0 asm ("x0") = i; \
315 register atomic64_t *x1 asm ("x1") = v; \ 315 register atomic64_t *x1 asm ("x1") = v; \
316 \ 316 \
317 asm volatile(ARM64_LSE_ATOMIC_INSN( \ 317 asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -335,9 +335,9 @@ ATOMIC64_FETCH_OP_AND( , al, "memory")
335 335
336#undef ATOMIC64_FETCH_OP_AND 336#undef ATOMIC64_FETCH_OP_AND
337 337
338static inline void arch_atomic64_sub(long i, atomic64_t *v) 338static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
339{ 339{
340 register long x0 asm ("x0") = i; 340 register s64 x0 asm ("x0") = i;
341 register atomic64_t *x1 asm ("x1") = v; 341 register atomic64_t *x1 asm ("x1") = v;
342 342
343 asm volatile(ARM64_LSE_ATOMIC_INSN( 343 asm volatile(ARM64_LSE_ATOMIC_INSN(
@@ -353,9 +353,9 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
353} 353}
354 354
355#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ 355#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
356static inline long arch_atomic64_sub_return##name(long i, atomic64_t *v)\ 356static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v) \
357{ \ 357{ \
358 register long x0 asm ("x0") = i; \ 358 register s64 x0 asm ("x0") = i; \
359 register atomic64_t *x1 asm ("x1") = v; \ 359 register atomic64_t *x1 asm ("x1") = v; \
360 \ 360 \
361 asm volatile(ARM64_LSE_ATOMIC_INSN( \ 361 asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -381,9 +381,9 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory")
381#undef ATOMIC64_OP_SUB_RETURN 381#undef ATOMIC64_OP_SUB_RETURN
382 382
383#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ 383#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \
384static inline long arch_atomic64_fetch_sub##name(long i, atomic64_t *v) \ 384static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
385{ \ 385{ \
386 register long x0 asm ("x0") = i; \ 386 register s64 x0 asm ("x0") = i; \
387 register atomic64_t *x1 asm ("x1") = v; \ 387 register atomic64_t *x1 asm ("x1") = v; \
388 \ 388 \
389 asm volatile(ARM64_LSE_ATOMIC_INSN( \ 389 asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -407,7 +407,7 @@ ATOMIC64_FETCH_OP_SUB( , al, "memory")
407 407
408#undef ATOMIC64_FETCH_OP_SUB 408#undef ATOMIC64_FETCH_OP_SUB
409 409
410static inline long arch_atomic64_dec_if_positive(atomic64_t *v) 410static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
411{ 411{
412 register long x0 asm ("x0") = (long)v; 412 register long x0 asm ("x0") = (long)v;
413 413
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index a05db636981a..64eeaa41e7ca 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -80,12 +80,15 @@ static inline u32 cache_type_cwg(void)
80 80
81#define __read_mostly __attribute__((__section__(".data..read_mostly"))) 81#define __read_mostly __attribute__((__section__(".data..read_mostly")))
82 82
83static inline int cache_line_size(void) 83static inline int cache_line_size_of_cpu(void)
84{ 84{
85 u32 cwg = cache_type_cwg(); 85 u32 cwg = cache_type_cwg();
86
86 return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; 87 return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
87} 88}
88 89
90int cache_line_size(void);
91
89/* 92/*
90 * Read the effective value of CTR_EL0. 93 * Read the effective value of CTR_EL0.
91 * 94 *
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 1fe4467442aa..665c78e0665a 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -176,4 +176,7 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
176 176
177int set_memory_valid(unsigned long addr, int numpages, int enable); 177int set_memory_valid(unsigned long addr, int numpages, int enable);
178 178
179int set_direct_map_invalid_noflush(struct page *page);
180int set_direct_map_default_noflush(struct page *page);
181
179#endif 182#endif
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 373799b7982f..3d8db50d9ae2 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -614,6 +614,12 @@ static inline bool system_uses_irq_prio_masking(void)
614 cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); 614 cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
615} 615}
616 616
617static inline bool system_has_prio_mask_debugging(void)
618{
619 return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) &&
620 system_uses_irq_prio_masking();
621}
622
617#define ARM64_SSBD_UNKNOWN -1 623#define ARM64_SSBD_UNKNOWN -1
618#define ARM64_SSBD_FORCE_DISABLE 0 624#define ARM64_SSBD_FORCE_DISABLE 0
619#define ARM64_SSBD_KERNEL 1 625#define ARM64_SSBD_KERNEL 1
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 6dd8a8723525..987926ed535e 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -7,6 +7,7 @@
7 7
8#include <linux/irqflags.h> 8#include <linux/irqflags.h>
9 9
10#include <asm/arch_gicv3.h>
10#include <asm/cpufeature.h> 11#include <asm/cpufeature.h>
11 12
12#define DAIF_PROCCTX 0 13#define DAIF_PROCCTX 0
@@ -16,11 +17,20 @@
16/* mask/save/unmask/restore all exceptions, including interrupts. */ 17/* mask/save/unmask/restore all exceptions, including interrupts. */
17static inline void local_daif_mask(void) 18static inline void local_daif_mask(void)
18{ 19{
20 WARN_ON(system_has_prio_mask_debugging() &&
21 (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF |
22 GIC_PRIO_PSR_I_SET)));
23
19 asm volatile( 24 asm volatile(
20 "msr daifset, #0xf // local_daif_mask\n" 25 "msr daifset, #0xf // local_daif_mask\n"
21 : 26 :
22 : 27 :
23 : "memory"); 28 : "memory");
29
30 /* Don't really care for a dsb here, we don't intend to enable IRQs */
31 if (system_uses_irq_prio_masking())
32 gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
33
24 trace_hardirqs_off(); 34 trace_hardirqs_off();
25} 35}
26 36
@@ -32,7 +42,7 @@ static inline unsigned long local_daif_save(void)
32 42
33 if (system_uses_irq_prio_masking()) { 43 if (system_uses_irq_prio_masking()) {
34 /* If IRQs are masked with PMR, reflect it in the flags */ 44 /* If IRQs are masked with PMR, reflect it in the flags */
35 if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF) 45 if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
36 flags |= PSR_I_BIT; 46 flags |= PSR_I_BIT;
37 } 47 }
38 48
@@ -45,39 +55,50 @@ static inline void local_daif_restore(unsigned long flags)
45{ 55{
46 bool irq_disabled = flags & PSR_I_BIT; 56 bool irq_disabled = flags & PSR_I_BIT;
47 57
58 WARN_ON(system_has_prio_mask_debugging() &&
59 !(read_sysreg(daif) & PSR_I_BIT));
60
48 if (!irq_disabled) { 61 if (!irq_disabled) {
49 trace_hardirqs_on(); 62 trace_hardirqs_on();
50 63
51 if (system_uses_irq_prio_masking())
52 arch_local_irq_enable();
53 } else if (!(flags & PSR_A_BIT)) {
54 /*
55 * If interrupts are disabled but we can take
56 * asynchronous errors, we can take NMIs
57 */
58 if (system_uses_irq_prio_masking()) { 64 if (system_uses_irq_prio_masking()) {
59 flags &= ~PSR_I_BIT; 65 gic_write_pmr(GIC_PRIO_IRQON);
66 dsb(sy);
67 }
68 } else if (system_uses_irq_prio_masking()) {
69 u64 pmr;
70
71 if (!(flags & PSR_A_BIT)) {
60 /* 72 /*
61 * There has been concern that the write to daif 73 * If interrupts are disabled but we can take
62 * might be reordered before this write to PMR. 74 * asynchronous errors, we can take NMIs
63 * From the ARM ARM DDI 0487D.a, section D1.7.1
64 * "Accessing PSTATE fields":
65 * Writes to the PSTATE fields have side-effects on
66 * various aspects of the PE operation. All of these
67 * side-effects are guaranteed:
68 * - Not to be visible to earlier instructions in
69 * the execution stream.
70 * - To be visible to later instructions in the
71 * execution stream
72 *
73 * Also, writes to PMR are self-synchronizing, so no
74 * interrupts with a lower priority than PMR is signaled
75 * to the PE after the write.
76 *
77 * So we don't need additional synchronization here.
78 */ 75 */
79 arch_local_irq_disable(); 76 flags &= ~PSR_I_BIT;
77 pmr = GIC_PRIO_IRQOFF;
78 } else {
79 pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
80 } 80 }
81
82 /*
83 * There has been concern that the write to daif
84 * might be reordered before this write to PMR.
85 * From the ARM ARM DDI 0487D.a, section D1.7.1
86 * "Accessing PSTATE fields":
87 * Writes to the PSTATE fields have side-effects on
88 * various aspects of the PE operation. All of these
89 * side-effects are guaranteed:
90 * - Not to be visible to earlier instructions in
91 * the execution stream.
92 * - To be visible to later instructions in the
93 * execution stream
94 *
95 * Also, writes to PMR are self-synchronizing, so no
96 * interrupts with a lower priority than PMR is signaled
97 * to the PE after the write.
98 *
99 * So we don't need additional synchronization here.
100 */
101 gic_write_pmr(pmr);
81 } 102 }
82 103
83 write_sysreg(flags, daif); 104 write_sysreg(flags, daif);
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 325d9515c0f8..3c7037c6ba9b 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -202,7 +202,21 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
202({ \ 202({ \
203 set_thread_flag(TIF_32BIT); \ 203 set_thread_flag(TIF_32BIT); \
204 }) 204 })
205#ifdef CONFIG_GENERIC_COMPAT_VDSO
206#define COMPAT_ARCH_DLINFO \
207do { \
208 /* \
209 * Note that we use Elf64_Off instead of elf_addr_t because \
210 * elf_addr_t in compat is defined as Elf32_Addr and casting \
211 * current->mm->context.vdso to it triggers a cast warning of \
212 * cast from pointer to integer of different size. \
213 */ \
214 NEW_AUX_ENT(AT_SYSINFO_EHDR, \
215 (Elf64_Off)current->mm->context.vdso); \
216} while (0)
217#else
205#define COMPAT_ARCH_DLINFO 218#define COMPAT_ARCH_DLINFO
219#endif
206extern int aarch32_setup_additional_pages(struct linux_binprm *bprm, 220extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
207 int uses_interp); 221 int uses_interp);
208#define compat_arch_setup_additional_pages \ 222#define compat_arch_setup_additional_pages \
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 897029c8e9b5..b6a2c352f4c3 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -37,8 +37,6 @@ struct task_struct;
37extern void fpsimd_save_state(struct user_fpsimd_state *state); 37extern void fpsimd_save_state(struct user_fpsimd_state *state);
38extern void fpsimd_load_state(struct user_fpsimd_state *state); 38extern void fpsimd_load_state(struct user_fpsimd_state *state);
39 39
40extern void fpsimd_save(void);
41
42extern void fpsimd_thread_switch(struct task_struct *next); 40extern void fpsimd_thread_switch(struct task_struct *next);
43extern void fpsimd_flush_thread(void); 41extern void fpsimd_flush_thread(void);
44 42
@@ -52,8 +50,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
52 void *sve_state, unsigned int sve_vl); 50 void *sve_state, unsigned int sve_vl);
53 51
54extern void fpsimd_flush_task_state(struct task_struct *target); 52extern void fpsimd_flush_task_state(struct task_struct *target);
55extern void fpsimd_flush_cpu_state(void); 53extern void fpsimd_save_and_flush_cpu_state(void);
56extern void sve_flush_cpu_state(void);
57 54
58/* Maximum VL that SVE VL-agnostic software can transparently support */ 55/* Maximum VL that SVE VL-agnostic software can transparently support */
59#define SVE_VL_ARCH_MAX 0x100 56#define SVE_VL_ARCH_MAX 0x100
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index e5d9420cd258..3d2f2472a36c 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -84,6 +84,8 @@
84#define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM) 84#define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM)
85#define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3) 85#define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3)
86#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) 86#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4)
87#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2)
88#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT)
87 89
88/* 90/*
89 * This yields a mask that user programs can use to figure out what 91 * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 66853fde60f9..7872f260c9ee 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -29,6 +29,12 @@
29 */ 29 */
30static inline void arch_local_irq_enable(void) 30static inline void arch_local_irq_enable(void)
31{ 31{
32 if (system_has_prio_mask_debugging()) {
33 u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
34
35 WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
36 }
37
32 asm volatile(ALTERNATIVE( 38 asm volatile(ALTERNATIVE(
33 "msr daifclr, #2 // arch_local_irq_enable\n" 39 "msr daifclr, #2 // arch_local_irq_enable\n"
34 "nop", 40 "nop",
@@ -42,6 +48,12 @@ static inline void arch_local_irq_enable(void)
42 48
43static inline void arch_local_irq_disable(void) 49static inline void arch_local_irq_disable(void)
44{ 50{
51 if (system_has_prio_mask_debugging()) {
52 u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
53
54 WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
55 }
56
45 asm volatile(ALTERNATIVE( 57 asm volatile(ALTERNATIVE(
46 "msr daifset, #2 // arch_local_irq_disable", 58 "msr daifset, #2 // arch_local_irq_disable",
47 __msr_s(SYS_ICC_PMR_EL1, "%0"), 59 __msr_s(SYS_ICC_PMR_EL1, "%0"),
@@ -56,43 +68,46 @@ static inline void arch_local_irq_disable(void)
56 */ 68 */
57static inline unsigned long arch_local_save_flags(void) 69static inline unsigned long arch_local_save_flags(void)
58{ 70{
59 unsigned long daif_bits;
60 unsigned long flags; 71 unsigned long flags;
61 72
62 daif_bits = read_sysreg(daif);
63
64 /*
65 * The asm is logically equivalent to:
66 *
67 * if (system_uses_irq_prio_masking())
68 * flags = (daif_bits & PSR_I_BIT) ?
69 * GIC_PRIO_IRQOFF :
70 * read_sysreg_s(SYS_ICC_PMR_EL1);
71 * else
72 * flags = daif_bits;
73 */
74 asm volatile(ALTERNATIVE( 73 asm volatile(ALTERNATIVE(
75 "mov %0, %1\n" 74 "mrs %0, daif",
76 "nop\n" 75 __mrs_s("%0", SYS_ICC_PMR_EL1),
77 "nop", 76 ARM64_HAS_IRQ_PRIO_MASKING)
78 __mrs_s("%0", SYS_ICC_PMR_EL1) 77 : "=&r" (flags)
79 "ands %1, %1, " __stringify(PSR_I_BIT) "\n" 78 :
80 "csel %0, %0, %2, eq",
81 ARM64_HAS_IRQ_PRIO_MASKING)
82 : "=&r" (flags), "+r" (daif_bits)
83 : "r" ((unsigned long) GIC_PRIO_IRQOFF)
84 : "memory"); 79 : "memory");
85 80
86 return flags; 81 return flags;
87} 82}
88 83
84static inline int arch_irqs_disabled_flags(unsigned long flags)
85{
86 int res;
87
88 asm volatile(ALTERNATIVE(
89 "and %w0, %w1, #" __stringify(PSR_I_BIT),
90 "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON),
91 ARM64_HAS_IRQ_PRIO_MASKING)
92 : "=&r" (res)
93 : "r" ((int) flags)
94 : "memory");
95
96 return res;
97}
98
89static inline unsigned long arch_local_irq_save(void) 99static inline unsigned long arch_local_irq_save(void)
90{ 100{
91 unsigned long flags; 101 unsigned long flags;
92 102
93 flags = arch_local_save_flags(); 103 flags = arch_local_save_flags();
94 104
95 arch_local_irq_disable(); 105 /*
106 * There are too many states with IRQs disabled, just keep the current
107 * state if interrupts are already disabled/masked.
108 */
109 if (!arch_irqs_disabled_flags(flags))
110 arch_local_irq_disable();
96 111
97 return flags; 112 return flags;
98} 113}
@@ -108,26 +123,10 @@ static inline void arch_local_irq_restore(unsigned long flags)
108 __msr_s(SYS_ICC_PMR_EL1, "%0") 123 __msr_s(SYS_ICC_PMR_EL1, "%0")
109 "dsb sy", 124 "dsb sy",
110 ARM64_HAS_IRQ_PRIO_MASKING) 125 ARM64_HAS_IRQ_PRIO_MASKING)
111 : "+r" (flags)
112 : 126 :
127 : "r" (flags)
113 : "memory"); 128 : "memory");
114} 129}
115 130
116static inline int arch_irqs_disabled_flags(unsigned long flags)
117{
118 int res;
119
120 asm volatile(ALTERNATIVE(
121 "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n"
122 "nop",
123 "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
124 "cset %w0, ls",
125 ARM64_HAS_IRQ_PRIO_MASKING)
126 : "=&r" (res)
127 : "r" ((int) flags)
128 : "memory");
129
130 return res;
131}
132#endif 131#endif
133#endif 132#endif
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index c328191aa202..9f19c354b165 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -597,11 +597,12 @@ static inline void kvm_arm_vhe_guest_enter(void)
597 * will not signal the CPU of interrupts of lower priority, and the 597 * will not signal the CPU of interrupts of lower priority, and the
598 * only way to get out will be via guest exceptions. 598 * only way to get out will be via guest exceptions.
599 * Naturally, we want to avoid this. 599 * Naturally, we want to avoid this.
600 *
601 * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
602 * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
600 */ 603 */
601 if (system_uses_irq_prio_masking()) { 604 if (system_uses_irq_prio_masking())
602 gic_write_pmr(GIC_PRIO_IRQON);
603 dsb(sy); 605 dsb(sy);
604 }
605} 606}
606 607
607static inline void kvm_arm_vhe_guest_exit(void) 608static inline void kvm_arm_vhe_guest_exit(void)
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 30e5e67749e5..db92950bb1a0 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -115,7 +115,6 @@
115 * Level 2 descriptor (PMD). 115 * Level 2 descriptor (PMD).
116 */ 116 */
117#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) 117#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
118#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
119#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) 118#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
120#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) 119#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
121#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) 120#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
@@ -142,8 +141,8 @@
142/* 141/*
143 * Level 3 descriptor (PTE). 142 * Level 3 descriptor (PTE).
144 */ 143 */
144#define PTE_VALID (_AT(pteval_t, 1) << 0)
145#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) 145#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
146#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
147#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) 146#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
148#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) 147#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
149#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ 148#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index c81583be034b..f318258a14be 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -13,7 +13,6 @@
13/* 13/*
14 * Software defined PTE bits definition. 14 * Software defined PTE bits definition.
15 */ 15 */
16#define PTE_VALID (_AT(pteval_t, 1) << 0)
17#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ 16#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
18#define PTE_DIRTY (_AT(pteval_t, 1) << 55) 17#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
19#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) 18#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index fca26759081a..3052381baaeb 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -235,29 +235,42 @@ extern void __sync_icache_dcache(pte_t pteval);
235 * 235 *
236 * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) 236 * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
237 */ 237 */
238static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 238
239 pte_t *ptep, pte_t pte) 239static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
240 pte_t pte)
240{ 241{
241 pte_t old_pte; 242 pte_t old_pte;
242 243
243 if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) 244 if (!IS_ENABLED(CONFIG_DEBUG_VM))
244 __sync_icache_dcache(pte); 245 return;
246
247 old_pte = READ_ONCE(*ptep);
248
249 if (!pte_valid(old_pte) || !pte_valid(pte))
250 return;
251 if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1)
252 return;
245 253
246 /* 254 /*
247 * If the existing pte is valid, check for potential race with 255 * Check for potential race with hardware updates of the pte
248 * hardware updates of the pte (ptep_set_access_flags safely changes 256 * (ptep_set_access_flags safely changes valid ptes without going
249 * valid ptes without going through an invalid entry). 257 * through an invalid entry).
250 */ 258 */
251 old_pte = READ_ONCE(*ptep); 259 VM_WARN_ONCE(!pte_young(pte),
252 if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) && 260 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
253 (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { 261 __func__, pte_val(old_pte), pte_val(pte));
254 VM_WARN_ONCE(!pte_young(pte), 262 VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
255 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", 263 "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
256 __func__, pte_val(old_pte), pte_val(pte)); 264 __func__, pte_val(old_pte), pte_val(pte));
257 VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), 265}
258 "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", 266
259 __func__, pte_val(old_pte), pte_val(pte)); 267static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
260 } 268 pte_t *ptep, pte_t pte)
269{
270 if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
271 __sync_icache_dcache(pte);
272
273 __check_racy_pte_update(mm, ptep, pte);
261 274
262 set_pte(ptep, pte); 275 set_pte(ptep, pte);
263} 276}
@@ -324,9 +337,14 @@ static inline pmd_t pte_pmd(pte_t pte)
324 return __pmd(pte_val(pte)); 337 return __pmd(pte_val(pte));
325} 338}
326 339
327static inline pgprot_t mk_sect_prot(pgprot_t prot) 340static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
341{
342 return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
343}
344
345static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
328{ 346{
329 return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); 347 return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
330} 348}
331 349
332#ifdef CONFIG_NUMA_BALANCING 350#ifdef CONFIG_NUMA_BALANCING
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index dad858b6adc6..81693244f58d 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -24,9 +24,15 @@
24 * means masking more IRQs (or at least that the same IRQs remain masked). 24 * means masking more IRQs (or at least that the same IRQs remain masked).
25 * 25 *
26 * To mask interrupts, we clear the most significant bit of PMR. 26 * To mask interrupts, we clear the most significant bit of PMR.
27 *
28 * Some code sections either automatically switch back to PSR.I or explicitly
29 * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
30 * in the the priority mask, it indicates that PSR.I should be set and
31 * interrupt disabling temporarily does not rely on IRQ priorities.
27 */ 32 */
28#define GIC_PRIO_IRQON 0xf0 33#define GIC_PRIO_IRQON 0xc0
29#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) 34#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
35#define GIC_PRIO_PSR_I_SET (1 << 4)
30 36
31/* Additional SPSR bits not exposed in the UABI */ 37/* Additional SPSR bits not exposed in the UABI */
32#define PSR_IL_BIT (1 << 20) 38#define PSR_IL_BIT (1 << 20)
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index 0418c67f2b8b..bd43d1cf724b 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -9,6 +9,52 @@
9#ifdef CONFIG_COMPAT 9#ifdef CONFIG_COMPAT
10#include <linux/compat.h> 10#include <linux/compat.h>
11 11
12struct compat_sigcontext {
13 /* We always set these two fields to 0 */
14 compat_ulong_t trap_no;
15 compat_ulong_t error_code;
16
17 compat_ulong_t oldmask;
18 compat_ulong_t arm_r0;
19 compat_ulong_t arm_r1;
20 compat_ulong_t arm_r2;
21 compat_ulong_t arm_r3;
22 compat_ulong_t arm_r4;
23 compat_ulong_t arm_r5;
24 compat_ulong_t arm_r6;
25 compat_ulong_t arm_r7;
26 compat_ulong_t arm_r8;
27 compat_ulong_t arm_r9;
28 compat_ulong_t arm_r10;
29 compat_ulong_t arm_fp;
30 compat_ulong_t arm_ip;
31 compat_ulong_t arm_sp;
32 compat_ulong_t arm_lr;
33 compat_ulong_t arm_pc;
34 compat_ulong_t arm_cpsr;
35 compat_ulong_t fault_address;
36};
37
38struct compat_ucontext {
39 compat_ulong_t uc_flags;
40 compat_uptr_t uc_link;
41 compat_stack_t uc_stack;
42 struct compat_sigcontext uc_mcontext;
43 compat_sigset_t uc_sigmask;
44 int __unused[32 - (sizeof(compat_sigset_t) / sizeof(int))];
45 compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
46};
47
48struct compat_sigframe {
49 struct compat_ucontext uc;
50 compat_ulong_t retcode[2];
51};
52
53struct compat_rt_sigframe {
54 struct compat_siginfo info;
55 struct compat_sigframe sig;
56};
57
12int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set, 58int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
13 struct pt_regs *regs); 59 struct pt_regs *regs);
14int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, 60int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 7e245b9e03a5..7434844036d3 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -12,9 +12,9 @@
12#include <linux/preempt.h> 12#include <linux/preempt.h>
13#include <linux/types.h> 13#include <linux/types.h>
14 14
15#ifdef CONFIG_KERNEL_MODE_NEON 15DECLARE_PER_CPU(bool, fpsimd_context_busy);
16 16
17DECLARE_PER_CPU(bool, kernel_neon_busy); 17#ifdef CONFIG_KERNEL_MODE_NEON
18 18
19/* 19/*
20 * may_use_simd - whether it is allowable at this time to issue SIMD 20 * may_use_simd - whether it is allowable at this time to issue SIMD
@@ -26,15 +26,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy);
26static __must_check inline bool may_use_simd(void) 26static __must_check inline bool may_use_simd(void)
27{ 27{
28 /* 28 /*
29 * kernel_neon_busy is only set while preemption is disabled, 29 * fpsimd_context_busy is only set while preemption is disabled,
30 * and is clear whenever preemption is enabled. Since 30 * and is clear whenever preemption is enabled. Since
31 * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy 31 * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
32 * cannot change under our feet -- if it's set we cannot be 32 * cannot change under our feet -- if it's set we cannot be
33 * migrated, and if it's clear we cannot be migrated to a CPU 33 * migrated, and if it's clear we cannot be migrated to a CPU
34 * where it is set. 34 * where it is set.
35 */ 35 */
36 return !in_irq() && !irqs_disabled() && !in_nmi() && 36 return !in_irq() && !irqs_disabled() && !in_nmi() &&
37 !this_cpu_read(kernel_neon_busy); 37 !this_cpu_read(fpsimd_context_busy);
38} 38}
39 39
40#else /* ! CONFIG_KERNEL_MODE_NEON */ 40#else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index cd7f7ce1a56a..d0bd4ffcf2c4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -549,6 +549,7 @@
549 549
550/* id_aa64isar1 */ 550/* id_aa64isar1 */
551#define ID_AA64ISAR1_SB_SHIFT 36 551#define ID_AA64ISAR1_SB_SHIFT 36
552#define ID_AA64ISAR1_FRINTTS_SHIFT 32
552#define ID_AA64ISAR1_GPI_SHIFT 28 553#define ID_AA64ISAR1_GPI_SHIFT 28
553#define ID_AA64ISAR1_GPA_SHIFT 24 554#define ID_AA64ISAR1_GPA_SHIFT 24
554#define ID_AA64ISAR1_LRCPC_SHIFT 20 555#define ID_AA64ISAR1_LRCPC_SHIFT 20
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 2372e97db29c..180b34ec5965 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -65,6 +65,7 @@ void arch_release_task_struct(struct task_struct *tsk);
65 * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace 65 * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
66 * TIF_SYSCALL_AUDIT - syscall auditing 66 * TIF_SYSCALL_AUDIT - syscall auditing
67 * TIF_SECCOMP - syscall secure computing 67 * TIF_SECCOMP - syscall secure computing
68 * TIF_SYSCALL_EMU - syscall emulation active
68 * TIF_SIGPENDING - signal pending 69 * TIF_SIGPENDING - signal pending
69 * TIF_NEED_RESCHED - rescheduling necessary 70 * TIF_NEED_RESCHED - rescheduling necessary
70 * TIF_NOTIFY_RESUME - callback before returning to user 71 * TIF_NOTIFY_RESUME - callback before returning to user
@@ -80,6 +81,7 @@ void arch_release_task_struct(struct task_struct *tsk);
80#define TIF_SYSCALL_AUDIT 9 81#define TIF_SYSCALL_AUDIT 9
81#define TIF_SYSCALL_TRACEPOINT 10 82#define TIF_SYSCALL_TRACEPOINT 10
82#define TIF_SECCOMP 11 83#define TIF_SECCOMP 11
84#define TIF_SYSCALL_EMU 12
83#define TIF_MEMDIE 18 /* is terminating due to OOM killer */ 85#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
84#define TIF_FREEZE 19 86#define TIF_FREEZE 19
85#define TIF_RESTORE_SIGMASK 20 87#define TIF_RESTORE_SIGMASK 20
@@ -98,6 +100,7 @@ void arch_release_task_struct(struct task_struct *tsk);
98#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 100#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
99#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) 101#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
100#define _TIF_SECCOMP (1 << TIF_SECCOMP) 102#define _TIF_SECCOMP (1 << TIF_SECCOMP)
103#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
101#define _TIF_UPROBE (1 << TIF_UPROBE) 104#define _TIF_UPROBE (1 << TIF_UPROBE)
102#define _TIF_FSCHECK (1 << TIF_FSCHECK) 105#define _TIF_FSCHECK (1 << TIF_FSCHECK)
103#define _TIF_32BIT (1 << TIF_32BIT) 106#define _TIF_32BIT (1 << TIF_32BIT)
@@ -109,7 +112,7 @@ void arch_release_task_struct(struct task_struct *tsk);
109 112
110#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ 113#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
111 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ 114 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
112 _TIF_NOHZ) 115 _TIF_NOHZ | _TIF_SYSCALL_EMU)
113 116
114#define INIT_THREAD_INFO(tsk) \ 117#define INIT_THREAD_INFO(tsk) \
115{ \ 118{ \
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index c9f8dd421c5f..2a23614198f1 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -22,8 +22,13 @@
22#define __NR_compat_exit 1 22#define __NR_compat_exit 1
23#define __NR_compat_read 3 23#define __NR_compat_read 3
24#define __NR_compat_write 4 24#define __NR_compat_write 4
25#define __NR_compat_gettimeofday 78
25#define __NR_compat_sigreturn 119 26#define __NR_compat_sigreturn 119
26#define __NR_compat_rt_sigreturn 173 27#define __NR_compat_rt_sigreturn 173
28#define __NR_compat_clock_getres 247
29#define __NR_compat_clock_gettime 263
30#define __NR_compat_clock_gettime64 403
31#define __NR_compat_clock_getres_time64 406
27 32
28/* 33/*
29 * The following SVCs are ARM private. 34 * The following SVCs are ARM private.
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index 1f94ec19903c..9c15e0a06301 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -17,6 +17,9 @@
17#ifndef __ASSEMBLY__ 17#ifndef __ASSEMBLY__
18 18
19#include <generated/vdso-offsets.h> 19#include <generated/vdso-offsets.h>
20#ifdef CONFIG_COMPAT_VDSO
21#include <generated/vdso32-offsets.h>
22#endif
20 23
21#define VDSO_SYMBOL(base, name) \ 24#define VDSO_SYMBOL(base, name) \
22({ \ 25({ \
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
new file mode 100644
index 000000000000..fb60a88b5ed4
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -0,0 +1,44 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2018 ARM Limited
4 */
5#ifndef __COMPAT_BARRIER_H
6#define __COMPAT_BARRIER_H
7
8#ifndef __ASSEMBLY__
9/*
10 * Warning: This code is meant to be used with
11 * ENABLE_COMPAT_VDSO only.
12 */
13#ifndef ENABLE_COMPAT_VDSO
14#error This header is meant to be used with ENABLE_COMPAT_VDSO only
15#endif
16
17#ifdef dmb
18#undef dmb
19#endif
20
21#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
22
23#if __LINUX_ARM_ARCH__ >= 8
24#define aarch32_smp_mb() dmb(ish)
25#define aarch32_smp_rmb() dmb(ishld)
26#define aarch32_smp_wmb() dmb(ishst)
27#else
28#define aarch32_smp_mb() dmb(ish)
29#define aarch32_smp_rmb() aarch32_smp_mb()
30#define aarch32_smp_wmb() dmb(ishst)
31#endif
32
33
34#undef smp_mb
35#undef smp_rmb
36#undef smp_wmb
37
38#define smp_mb() aarch32_smp_mb()
39#define smp_rmb() aarch32_smp_rmb()
40#define smp_wmb() aarch32_smp_wmb()
41
42#endif /* !__ASSEMBLY__ */
43
44#endif /* __COMPAT_BARRIER_H */
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
new file mode 100644
index 000000000000..f4812777f5c5
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -0,0 +1,126 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2018 ARM Limited
4 */
5#ifndef __ASM_VDSO_GETTIMEOFDAY_H
6#define __ASM_VDSO_GETTIMEOFDAY_H
7
8#ifndef __ASSEMBLY__
9
10#include <asm/unistd.h>
11#include <uapi/linux/time.h>
12
13#include <asm/vdso/compat_barrier.h>
14
15#define __VDSO_USE_SYSCALL ULLONG_MAX
16
17#define VDSO_HAS_CLOCK_GETRES 1
18
19static __always_inline
20int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
21 struct timezone *_tz)
22{
23 register struct timezone *tz asm("r1") = _tz;
24 register struct __kernel_old_timeval *tv asm("r0") = _tv;
25 register long ret asm ("r0");
26 register long nr asm("r7") = __NR_compat_gettimeofday;
27
28 asm volatile(
29 " swi #0\n"
30 : "=r" (ret)
31 : "r" (tv), "r" (tz), "r" (nr)
32 : "memory");
33
34 return ret;
35}
36
37static __always_inline
38long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
39{
40 register struct __kernel_timespec *ts asm("r1") = _ts;
41 register clockid_t clkid asm("r0") = _clkid;
42 register long ret asm ("r0");
43 register long nr asm("r7") = __NR_compat_clock_gettime64;
44
45 asm volatile(
46 " swi #0\n"
47 : "=r" (ret)
48 : "r" (clkid), "r" (ts), "r" (nr)
49 : "memory");
50
51 return ret;
52}
53
54static __always_inline
55int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
56{
57 register struct __kernel_timespec *ts asm("r1") = _ts;
58 register clockid_t clkid asm("r0") = _clkid;
59 register long ret asm ("r0");
60 register long nr asm("r7") = __NR_compat_clock_getres_time64;
61
62 /* The checks below are required for ABI consistency with arm */
63 if ((_clkid >= MAX_CLOCKS) && (_ts == NULL))
64 return -EINVAL;
65
66 asm volatile(
67 " swi #0\n"
68 : "=r" (ret)
69 : "r" (clkid), "r" (ts), "r" (nr)
70 : "memory");
71
72 return ret;
73}
74
75static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
76{
77 u64 res;
78
79 /*
80 * clock_mode == 0 implies that vDSO are enabled otherwise
81 * fallback on syscall.
82 */
83 if (clock_mode)
84 return __VDSO_USE_SYSCALL;
85
86 /*
87 * This isb() is required to prevent that the counter value
88 * is speculated.
89 */
90 isb();
91 asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res));
92 /*
93 * This isb() is required to prevent that the seq lock is
94 * speculated.
95 */
96 isb();
97
98 return res;
99}
100
101static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
102{
103 const struct vdso_data *ret;
104
105 /*
106 * This simply puts &_vdso_data into ret. The reason why we don't use
107 * `ret = _vdso_data` is that the compiler tends to optimise this in a
108 * very suboptimal way: instead of keeping &_vdso_data in a register,
109 * it goes through a relocation almost every time _vdso_data must be
110 * accessed (even in subfunctions). This is both time and space
111 * consuming: each relocation uses a word in the code section, and it
112 * has to be loaded at runtime.
113 *
114 * This trick hides the assignment from the compiler. Since it cannot
115 * track where the pointer comes from, it will only use one relocation
116 * where __arch_get_vdso_data() is called, and then keep the result in
117 * a register.
118 */
119 asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data));
120
121 return ret;
122}
123
124#endif /* !__ASSEMBLY__ */
125
126#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..b08f476b72b4
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,103 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2018 ARM Limited
4 */
5#ifndef __ASM_VDSO_GETTIMEOFDAY_H
6#define __ASM_VDSO_GETTIMEOFDAY_H
7
8#ifndef __ASSEMBLY__
9
10#include <asm/unistd.h>
11#include <uapi/linux/time.h>
12
13#define __VDSO_USE_SYSCALL ULLONG_MAX
14
15#define VDSO_HAS_CLOCK_GETRES 1
16
17static __always_inline
18int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
19 struct timezone *_tz)
20{
21 register struct timezone *tz asm("x1") = _tz;
22 register struct __kernel_old_timeval *tv asm("x0") = _tv;
23 register long ret asm ("x0");
24 register long nr asm("x8") = __NR_gettimeofday;
25
26 asm volatile(
27 " svc #0\n"
28 : "=r" (ret)
29 : "r" (tv), "r" (tz), "r" (nr)
30 : "memory");
31
32 return ret;
33}
34
35static __always_inline
36long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
37{
38 register struct __kernel_timespec *ts asm("x1") = _ts;
39 register clockid_t clkid asm("x0") = _clkid;
40 register long ret asm ("x0");
41 register long nr asm("x8") = __NR_clock_gettime;
42
43 asm volatile(
44 " svc #0\n"
45 : "=r" (ret)
46 : "r" (clkid), "r" (ts), "r" (nr)
47 : "memory");
48
49 return ret;
50}
51
52static __always_inline
53int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
54{
55 register struct __kernel_timespec *ts asm("x1") = _ts;
56 register clockid_t clkid asm("x0") = _clkid;
57 register long ret asm ("x0");
58 register long nr asm("x8") = __NR_clock_getres;
59
60 asm volatile(
61 " svc #0\n"
62 : "=r" (ret)
63 : "r" (clkid), "r" (ts), "r" (nr)
64 : "memory");
65
66 return ret;
67}
68
69static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
70{
71 u64 res;
72
73 /*
74 * clock_mode == 0 implies that vDSO are enabled otherwise
75 * fallback on syscall.
76 */
77 if (clock_mode)
78 return __VDSO_USE_SYSCALL;
79
80 /*
81 * This isb() is required to prevent that the counter value
82 * is speculated.
83 */
84 isb();
85 asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
86 /*
87 * This isb() is required to prevent that the seq lock is
88 * speculated.#
89 */
90 isb();
91
92 return res;
93}
94
95static __always_inline
96const struct vdso_data *__arch_get_vdso_data(void)
97{
98 return _vdso_data;
99}
100
101#endif /* !__ASSEMBLY__ */
102
103#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0c731bfc7c8c
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -0,0 +1,53 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __ASM_VDSO_VSYSCALL_H
3#define __ASM_VDSO_VSYSCALL_H
4
5#ifndef __ASSEMBLY__
6
7#include <linux/timekeeper_internal.h>
8#include <vdso/datapage.h>
9
10#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
11
12extern struct vdso_data *vdso_data;
13
14/*
15 * Update the vDSO data page to keep in sync with kernel timekeeping.
16 */
17static __always_inline
18struct vdso_data *__arm64_get_k_vdso_data(void)
19{
20 return vdso_data;
21}
22#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
23
24static __always_inline
25int __arm64_get_clock_mode(struct timekeeper *tk)
26{
27 u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
28
29 return use_syscall;
30}
31#define __arch_get_clock_mode __arm64_get_clock_mode
32
33static __always_inline
34int __arm64_use_vsyscall(struct vdso_data *vdata)
35{
36 return !vdata[CS_HRES_COARSE].clock_mode;
37}
38#define __arch_use_vsyscall __arm64_use_vsyscall
39
40static __always_inline
41void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
42{
43 vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
44 vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
45}
46#define __arch_update_vsyscall __arm64_update_vsyscall
47
48/* The asm-generic header needs to be included after the definitions above */
49#include <asm-generic/vdso/vsyscall.h>
50
51#endif /* !__ASSEMBLY__ */
52
53#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 1a772b162191..a1e72886b30c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -63,5 +63,7 @@
63#define HWCAP2_SVEBITPERM (1 << 4) 63#define HWCAP2_SVEBITPERM (1 << 4)
64#define HWCAP2_SVESHA3 (1 << 5) 64#define HWCAP2_SVESHA3 (1 << 5)
65#define HWCAP2_SVESM4 (1 << 6) 65#define HWCAP2_SVESM4 (1 << 6)
66#define HWCAP2_FLAGM2 (1 << 7)
67#define HWCAP2_FRINT (1 << 8)
66 68
67#endif /* _UAPI__ASM_HWCAP_H */ 69#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index e932284993d4..7ed9294e2004 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -62,6 +62,9 @@
62#define PSR_x 0x0000ff00 /* Extension */ 62#define PSR_x 0x0000ff00 /* Extension */
63#define PSR_c 0x000000ff /* Control */ 63#define PSR_c 0x000000ff /* Control */
64 64
65/* syscall emulation path in ptrace */
66#define PTRACE_SYSEMU 31
67#define PTRACE_SYSEMU_SINGLESTEP 32
65 68
66#ifndef __ASSEMBLY__ 69#ifndef __ASSEMBLY__
67 70
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9e7dcb2c31c7..478491f07b4f 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,10 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
28 $(call if_changed,objcopy) 28 $(call if_changed,objcopy)
29 29
30obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ 30obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
31 sigreturn32.o sys_compat.o 31 sys_compat.o
32ifneq ($(CONFIG_COMPAT_VDSO), y)
33obj-$(CONFIG_COMPAT) += sigreturn32.o
34endif
32obj-$(CONFIG_KUSER_HELPERS) += kuser32.o 35obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
33obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o 36obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
34obj-$(CONFIG_MODULES) += module.o 37obj-$(CONFIG_MODULES) += module.o
@@ -62,6 +65,7 @@ obj-$(CONFIG_ARM64_SSBD) += ssbd.o
62obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o 65obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
63 66
64obj-y += vdso/ probes/ 67obj-y += vdso/ probes/
68obj-$(CONFIG_COMPAT_VDSO) += vdso32/
65head-y := head.o 69head-y := head.o
66extra-y += $(head-y) vmlinux.lds 70extra-y += $(head-y) vmlinux.lds
67 71
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 2804330c95dc..3a58e9db5cfe 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -152,10 +152,14 @@ static int __init acpi_fadt_sanity_check(void)
152 */ 152 */
153 if (table->revision < 5 || 153 if (table->revision < 5 ||
154 (table->revision == 5 && fadt->minor_revision < 1)) { 154 (table->revision == 5 && fadt->minor_revision < 1)) {
155 pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n", 155 pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 5.1+\n",
156 table->revision, fadt->minor_revision); 156 table->revision, fadt->minor_revision);
157 ret = -EINVAL; 157
158 goto out; 158 if (!fadt->arm_boot_flags) {
159 ret = -EINVAL;
160 goto out;
161 }
162 pr_err("FADT has ARM boot flags set, assuming 5.1\n");
159 } 163 }
160 164
161 if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) { 165 if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 02f08768c298..214685760e1c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -18,9 +18,9 @@
18#include <asm/fixmap.h> 18#include <asm/fixmap.h>
19#include <asm/thread_info.h> 19#include <asm/thread_info.h>
20#include <asm/memory.h> 20#include <asm/memory.h>
21#include <asm/signal32.h>
21#include <asm/smp_plat.h> 22#include <asm/smp_plat.h>
22#include <asm/suspend.h> 23#include <asm/suspend.h>
23#include <asm/vdso_datapage.h>
24#include <linux/kbuild.h> 24#include <linux/kbuild.h>
25#include <linux/arm-smccc.h> 25#include <linux/arm-smccc.h>
26 26
@@ -66,6 +66,11 @@ int main(void)
66 DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); 66 DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
67 DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); 67 DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
68 BLANK(); 68 BLANK();
69#ifdef CONFIG_COMPAT
70 DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
71 DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));
72 BLANK();
73#endif
69 DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); 74 DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
70 BLANK(); 75 BLANK();
71 DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); 76 DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
@@ -80,33 +85,6 @@ int main(void)
80 BLANK(); 85 BLANK();
81 DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); 86 DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
82 BLANK(); 87 BLANK();
83 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
84 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
85 DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
86 DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res));
87 DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
88 DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
89 DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
90 DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
91 BLANK();
92 DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
93 DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
94 DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
95 DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
96 DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
97 DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
98 DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
99 DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
100 DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
101 DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
102 DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
103 BLANK();
104 DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
105 DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
106 BLANK();
107 DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
108 DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
109 BLANK();
110 DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); 88 DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
111 DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); 89 DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
112 BLANK(); 90 BLANK();
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 880d79904d36..7fa6828bb488 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -17,6 +17,15 @@
17#define CLIDR_CTYPE(clidr, level) \ 17#define CLIDR_CTYPE(clidr, level) \
18 (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) 18 (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
19 19
20int cache_line_size(void)
21{
22 if (coherency_max_size != 0)
23 return coherency_max_size;
24
25 return cache_line_size_of_cpu();
26}
27EXPORT_SYMBOL_GPL(cache_line_size);
28
20static inline enum cache_type get_cache_type(int level) 29static inline enum cache_type get_cache_type(int level)
21{ 30{
22 u64 clidr; 31 u64 clidr;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index aabdabf52fdb..f29f36a65175 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1184,14 +1184,14 @@ static struct undef_hook ssbs_emulation_hook = {
1184static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) 1184static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
1185{ 1185{
1186 static bool undef_hook_registered = false; 1186 static bool undef_hook_registered = false;
1187 static DEFINE_SPINLOCK(hook_lock); 1187 static DEFINE_RAW_SPINLOCK(hook_lock);
1188 1188
1189 spin_lock(&hook_lock); 1189 raw_spin_lock(&hook_lock);
1190 if (!undef_hook_registered) { 1190 if (!undef_hook_registered) {
1191 register_undef_hook(&ssbs_emulation_hook); 1191 register_undef_hook(&ssbs_emulation_hook);
1192 undef_hook_registered = true; 1192 undef_hook_registered = true;
1193 } 1193 }
1194 spin_unlock(&hook_lock); 1194 raw_spin_unlock(&hook_lock);
1195 1195
1196 if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { 1196 if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
1197 sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); 1197 sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
@@ -1618,6 +1618,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
1618 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), 1618 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
1619 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), 1619 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
1620 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), 1620 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
1621 HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
1621 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), 1622 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
1622 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), 1623 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
1623 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), 1624 HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
@@ -1629,6 +1630,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
1629 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), 1630 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
1630 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), 1631 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
1631 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), 1632 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
1633 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
1632 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), 1634 HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
1633 HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), 1635 HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
1634#ifdef CONFIG_ARM64_SVE 1636#ifdef CONFIG_ARM64_SVE
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 0593665fc7b4..876055e37352 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -82,6 +82,8 @@ static const char *const hwcap_str[] = {
82 "svebitperm", 82 "svebitperm",
83 "svesha3", 83 "svesha3",
84 "svesm4", 84 "svesm4",
85 "flagm2",
86 "frint",
85 NULL 87 NULL
86}; 88};
87 89
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2df8d0a1d980..9cdc4592da3e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -247,6 +247,7 @@ alternative_else_nop_endif
247 /* 247 /*
248 * Registers that may be useful after this macro is invoked: 248 * Registers that may be useful after this macro is invoked:
249 * 249 *
250 * x20 - ICC_PMR_EL1
250 * x21 - aborted SP 251 * x21 - aborted SP
251 * x22 - aborted PC 252 * x22 - aborted PC
252 * x23 - aborted PSTATE 253 * x23 - aborted PSTATE
@@ -424,6 +425,38 @@ tsk .req x28 // current thread_info
424 irq_stack_exit 425 irq_stack_exit
425 .endm 426 .endm
426 427
428#ifdef CONFIG_ARM64_PSEUDO_NMI
429 /*
430 * Set res to 0 if irqs were unmasked in interrupted context.
431 * Otherwise set res to non-0 value.
432 */
433 .macro test_irqs_unmasked res:req, pmr:req
434alternative_if ARM64_HAS_IRQ_PRIO_MASKING
435 sub \res, \pmr, #GIC_PRIO_IRQON
436alternative_else
437 mov \res, xzr
438alternative_endif
439 .endm
440#endif
441
442 .macro gic_prio_kentry_setup, tmp:req
443#ifdef CONFIG_ARM64_PSEUDO_NMI
444 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
445 mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
446 msr_s SYS_ICC_PMR_EL1, \tmp
447 alternative_else_nop_endif
448#endif
449 .endm
450
451 .macro gic_prio_irq_setup, pmr:req, tmp:req
452#ifdef CONFIG_ARM64_PSEUDO_NMI
453 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
454 orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
455 msr_s SYS_ICC_PMR_EL1, \tmp
456 alternative_else_nop_endif
457#endif
458 .endm
459
427 .text 460 .text
428 461
429/* 462/*
@@ -602,6 +635,7 @@ el1_dbg:
602 cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 635 cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
603 cinc x24, x24, eq // set bit '0' 636 cinc x24, x24, eq // set bit '0'
604 tbz x24, #0, el1_inv // EL1 only 637 tbz x24, #0, el1_inv // EL1 only
638 gic_prio_kentry_setup tmp=x3
605 mrs x0, far_el1 639 mrs x0, far_el1
606 mov x2, sp // struct pt_regs 640 mov x2, sp // struct pt_regs
607 bl do_debug_exception 641 bl do_debug_exception
@@ -619,20 +653,18 @@ ENDPROC(el1_sync)
619 .align 6 653 .align 6
620el1_irq: 654el1_irq:
621 kernel_entry 1 655 kernel_entry 1
656 gic_prio_irq_setup pmr=x20, tmp=x1
622 enable_da_f 657 enable_da_f
623#ifdef CONFIG_TRACE_IRQFLAGS 658
624#ifdef CONFIG_ARM64_PSEUDO_NMI 659#ifdef CONFIG_ARM64_PSEUDO_NMI
625alternative_if ARM64_HAS_IRQ_PRIO_MASKING 660 test_irqs_unmasked res=x0, pmr=x20
626 ldr x20, [sp, #S_PMR_SAVE] 661 cbz x0, 1f
627alternative_else 662 bl asm_nmi_enter
628 mov x20, #GIC_PRIO_IRQON 6631:
629alternative_endif
630 cmp x20, #GIC_PRIO_IRQOFF
631 /* Irqs were disabled, don't trace */
632 b.ls 1f
633#endif 664#endif
665
666#ifdef CONFIG_TRACE_IRQFLAGS
634 bl trace_hardirqs_off 667 bl trace_hardirqs_off
6351:
636#endif 668#endif
637 669
638 irq_handler 670 irq_handler
@@ -651,14 +683,23 @@ alternative_else_nop_endif
651 bl preempt_schedule_irq // irq en/disable is done inside 683 bl preempt_schedule_irq // irq en/disable is done inside
6521: 6841:
653#endif 685#endif
654#ifdef CONFIG_TRACE_IRQFLAGS 686
655#ifdef CONFIG_ARM64_PSEUDO_NMI 687#ifdef CONFIG_ARM64_PSEUDO_NMI
656 /* 688 /*
657 * if IRQs were disabled when we received the interrupt, we have an NMI 689 * When using IRQ priority masking, we can get spurious interrupts while
658 * and we are not re-enabling interrupt upon eret. Skip tracing. 690 * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
691 * section with interrupts disabled. Skip tracing in those cases.
659 */ 692 */
660 cmp x20, #GIC_PRIO_IRQOFF 693 test_irqs_unmasked res=x0, pmr=x20
661 b.ls 1f 694 cbz x0, 1f
695 bl asm_nmi_exit
6961:
697#endif
698
699#ifdef CONFIG_TRACE_IRQFLAGS
700#ifdef CONFIG_ARM64_PSEUDO_NMI
701 test_irqs_unmasked res=x0, pmr=x20
702 cbnz x0, 1f
662#endif 703#endif
663 bl trace_hardirqs_on 704 bl trace_hardirqs_on
6641: 7051:
@@ -776,6 +817,7 @@ el0_ia:
776 * Instruction abort handling 817 * Instruction abort handling
777 */ 818 */
778 mrs x26, far_el1 819 mrs x26, far_el1
820 gic_prio_kentry_setup tmp=x0
779 enable_da_f 821 enable_da_f
780#ifdef CONFIG_TRACE_IRQFLAGS 822#ifdef CONFIG_TRACE_IRQFLAGS
781 bl trace_hardirqs_off 823 bl trace_hardirqs_off
@@ -821,6 +863,7 @@ el0_sp_pc:
821 * Stack or PC alignment exception handling 863 * Stack or PC alignment exception handling
822 */ 864 */
823 mrs x26, far_el1 865 mrs x26, far_el1
866 gic_prio_kentry_setup tmp=x0
824 enable_da_f 867 enable_da_f
825#ifdef CONFIG_TRACE_IRQFLAGS 868#ifdef CONFIG_TRACE_IRQFLAGS
826 bl trace_hardirqs_off 869 bl trace_hardirqs_off
@@ -855,11 +898,12 @@ el0_dbg:
855 * Debug exception handling 898 * Debug exception handling
856 */ 899 */
857 tbnz x24, #0, el0_inv // EL0 only 900 tbnz x24, #0, el0_inv // EL0 only
901 gic_prio_kentry_setup tmp=x3
858 mrs x0, far_el1 902 mrs x0, far_el1
859 mov x1, x25 903 mov x1, x25
860 mov x2, sp 904 mov x2, sp
861 bl do_debug_exception 905 bl do_debug_exception
862 enable_daif 906 enable_da_f
863 ct_user_exit 907 ct_user_exit
864 b ret_to_user 908 b ret_to_user
865el0_inv: 909el0_inv:
@@ -876,7 +920,9 @@ ENDPROC(el0_sync)
876el0_irq: 920el0_irq:
877 kernel_entry 0 921 kernel_entry 0
878el0_irq_naked: 922el0_irq_naked:
923 gic_prio_irq_setup pmr=x20, tmp=x0
879 enable_da_f 924 enable_da_f
925
880#ifdef CONFIG_TRACE_IRQFLAGS 926#ifdef CONFIG_TRACE_IRQFLAGS
881 bl trace_hardirqs_off 927 bl trace_hardirqs_off
882#endif 928#endif
@@ -898,6 +944,7 @@ ENDPROC(el0_irq)
898el1_error: 944el1_error:
899 kernel_entry 1 945 kernel_entry 1
900 mrs x1, esr_el1 946 mrs x1, esr_el1
947 gic_prio_kentry_setup tmp=x2
901 enable_dbg 948 enable_dbg
902 mov x0, sp 949 mov x0, sp
903 bl do_serror 950 bl do_serror
@@ -908,10 +955,11 @@ el0_error:
908 kernel_entry 0 955 kernel_entry 0
909el0_error_naked: 956el0_error_naked:
910 mrs x1, esr_el1 957 mrs x1, esr_el1
958 gic_prio_kentry_setup tmp=x2
911 enable_dbg 959 enable_dbg
912 mov x0, sp 960 mov x0, sp
913 bl do_serror 961 bl do_serror
914 enable_daif 962 enable_da_f
915 ct_user_exit 963 ct_user_exit
916 b ret_to_user 964 b ret_to_user
917ENDPROC(el0_error) 965ENDPROC(el0_error)
@@ -932,6 +980,7 @@ work_pending:
932 */ 980 */
933ret_to_user: 981ret_to_user:
934 disable_daif 982 disable_daif
983 gic_prio_kentry_setup tmp=x3
935 ldr x1, [tsk, #TSK_TI_FLAGS] 984 ldr x1, [tsk, #TSK_TI_FLAGS]
936 and x2, x1, #_TIF_WORK_MASK 985 and x2, x1, #_TIF_WORK_MASK
937 cbnz x2, work_pending 986 cbnz x2, work_pending
@@ -948,6 +997,7 @@ ENDPROC(ret_to_user)
948 */ 997 */
949 .align 6 998 .align 6
950el0_svc: 999el0_svc:
1000 gic_prio_kentry_setup tmp=x1
951 mov x0, sp 1001 mov x0, sp
952 bl el0_svc_handler 1002 bl el0_svc_handler
953 b ret_to_user 1003 b ret_to_user
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 0cfcf5c237c5..eec4776ae5f0 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -82,7 +82,8 @@
82 * To prevent this from racing with the manipulation of the task's FPSIMD state 82 * To prevent this from racing with the manipulation of the task's FPSIMD state
83 * from task context and thereby corrupting the state, it is necessary to 83 * from task context and thereby corrupting the state, it is necessary to
84 * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE 84 * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
85 * flag with local_bh_disable() unless softirqs are already masked. 85 * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
86 * run but prevent them to use FPSIMD.
86 * 87 *
87 * For a certain task, the sequence may look something like this: 88 * For a certain task, the sequence may look something like this:
88 * - the task gets scheduled in; if both the task's fpsimd_cpu field 89 * - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -145,6 +146,56 @@ extern void __percpu *efi_sve_state;
145 146
146#endif /* ! CONFIG_ARM64_SVE */ 147#endif /* ! CONFIG_ARM64_SVE */
147 148
149DEFINE_PER_CPU(bool, fpsimd_context_busy);
150EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
151
152static void __get_cpu_fpsimd_context(void)
153{
154 bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
155
156 WARN_ON(busy);
157}
158
159/*
160 * Claim ownership of the CPU FPSIMD context for use by the calling context.
161 *
162 * The caller may freely manipulate the FPSIMD context metadata until
163 * put_cpu_fpsimd_context() is called.
164 *
165 * The double-underscore version must only be called if you know the task
166 * can't be preempted.
167 */
168static void get_cpu_fpsimd_context(void)
169{
170 preempt_disable();
171 __get_cpu_fpsimd_context();
172}
173
174static void __put_cpu_fpsimd_context(void)
175{
176 bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
177
178 WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
179}
180
181/*
182 * Release the CPU FPSIMD context.
183 *
184 * Must be called from a context in which get_cpu_fpsimd_context() was
185 * previously called, with no call to put_cpu_fpsimd_context() in the
186 * meantime.
187 */
188static void put_cpu_fpsimd_context(void)
189{
190 __put_cpu_fpsimd_context();
191 preempt_enable();
192}
193
194static bool have_cpu_fpsimd_context(void)
195{
196 return !preemptible() && __this_cpu_read(fpsimd_context_busy);
197}
198
148/* 199/*
149 * Call __sve_free() directly only if you know task can't be scheduled 200 * Call __sve_free() directly only if you know task can't be scheduled
150 * or preempted. 201 * or preempted.
@@ -215,12 +266,10 @@ static void sve_free(struct task_struct *task)
215 * This function should be called only when the FPSIMD/SVE state in 266 * This function should be called only when the FPSIMD/SVE state in
216 * thread_struct is known to be up to date, when preparing to enter 267 * thread_struct is known to be up to date, when preparing to enter
217 * userspace. 268 * userspace.
218 *
219 * Softirqs (and preemption) must be disabled.
220 */ 269 */
221static void task_fpsimd_load(void) 270static void task_fpsimd_load(void)
222{ 271{
223 WARN_ON(!in_softirq() && !irqs_disabled()); 272 WARN_ON(!have_cpu_fpsimd_context());
224 273
225 if (system_supports_sve() && test_thread_flag(TIF_SVE)) 274 if (system_supports_sve() && test_thread_flag(TIF_SVE))
226 sve_load_state(sve_pffr(&current->thread), 275 sve_load_state(sve_pffr(&current->thread),
@@ -233,16 +282,14 @@ static void task_fpsimd_load(void)
233/* 282/*
234 * Ensure FPSIMD/SVE storage in memory for the loaded context is up to 283 * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
235 * date with respect to the CPU registers. 284 * date with respect to the CPU registers.
236 *
237 * Softirqs (and preemption) must be disabled.
238 */ 285 */
239void fpsimd_save(void) 286static void fpsimd_save(void)
240{ 287{
241 struct fpsimd_last_state_struct const *last = 288 struct fpsimd_last_state_struct const *last =
242 this_cpu_ptr(&fpsimd_last_state); 289 this_cpu_ptr(&fpsimd_last_state);
243 /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ 290 /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
244 291
245 WARN_ON(!in_softirq() && !irqs_disabled()); 292 WARN_ON(!have_cpu_fpsimd_context());
246 293
247 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { 294 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
248 if (system_supports_sve() && test_thread_flag(TIF_SVE)) { 295 if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
@@ -364,7 +411,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x)
364 * task->thread.sve_state. 411 * task->thread.sve_state.
365 * 412 *
366 * Task can be a non-runnable task, or current. In the latter case, 413 * Task can be a non-runnable task, or current. In the latter case,
367 * softirqs (and preemption) must be disabled. 414 * the caller must have ownership of the cpu FPSIMD context before calling
415 * this function.
368 * task->thread.sve_state must point to at least sve_state_size(task) 416 * task->thread.sve_state must point to at least sve_state_size(task)
369 * bytes of allocated kernel memory. 417 * bytes of allocated kernel memory.
370 * task->thread.uw.fpsimd_state must be up to date before calling this 418 * task->thread.uw.fpsimd_state must be up to date before calling this
@@ -393,7 +441,8 @@ static void fpsimd_to_sve(struct task_struct *task)
393 * task->thread.uw.fpsimd_state. 441 * task->thread.uw.fpsimd_state.
394 * 442 *
395 * Task can be a non-runnable task, or current. In the latter case, 443 * Task can be a non-runnable task, or current. In the latter case,
396 * softirqs (and preemption) must be disabled. 444 * the caller must have ownership of the cpu FPSIMD context before calling
445 * this function.
397 * task->thread.sve_state must point to at least sve_state_size(task) 446 * task->thread.sve_state must point to at least sve_state_size(task)
398 * bytes of allocated kernel memory. 447 * bytes of allocated kernel memory.
399 * task->thread.sve_state must be up to date before calling this function. 448 * task->thread.sve_state must be up to date before calling this function.
@@ -557,7 +606,7 @@ int sve_set_vector_length(struct task_struct *task,
557 * non-SVE thread. 606 * non-SVE thread.
558 */ 607 */
559 if (task == current) { 608 if (task == current) {
560 local_bh_disable(); 609 get_cpu_fpsimd_context();
561 610
562 fpsimd_save(); 611 fpsimd_save();
563 } 612 }
@@ -567,7 +616,7 @@ int sve_set_vector_length(struct task_struct *task,
567 sve_to_fpsimd(task); 616 sve_to_fpsimd(task);
568 617
569 if (task == current) 618 if (task == current)
570 local_bh_enable(); 619 put_cpu_fpsimd_context();
571 620
572 /* 621 /*
573 * Force reallocation of task SVE state to the correct size 622 * Force reallocation of task SVE state to the correct size
@@ -880,7 +929,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
880 929
881 sve_alloc(current); 930 sve_alloc(current);
882 931
883 local_bh_disable(); 932 get_cpu_fpsimd_context();
884 933
885 fpsimd_save(); 934 fpsimd_save();
886 935
@@ -891,7 +940,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
891 if (test_and_set_thread_flag(TIF_SVE)) 940 if (test_and_set_thread_flag(TIF_SVE))
892 WARN_ON(1); /* SVE access shouldn't have trapped */ 941 WARN_ON(1); /* SVE access shouldn't have trapped */
893 942
894 local_bh_enable(); 943 put_cpu_fpsimd_context();
895} 944}
896 945
897/* 946/*
@@ -935,6 +984,8 @@ void fpsimd_thread_switch(struct task_struct *next)
935 if (!system_supports_fpsimd()) 984 if (!system_supports_fpsimd())
936 return; 985 return;
937 986
987 __get_cpu_fpsimd_context();
988
938 /* Save unsaved fpsimd state, if any: */ 989 /* Save unsaved fpsimd state, if any: */
939 fpsimd_save(); 990 fpsimd_save();
940 991
@@ -949,6 +1000,8 @@ void fpsimd_thread_switch(struct task_struct *next)
949 1000
950 update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, 1001 update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
951 wrong_task || wrong_cpu); 1002 wrong_task || wrong_cpu);
1003
1004 __put_cpu_fpsimd_context();
952} 1005}
953 1006
954void fpsimd_flush_thread(void) 1007void fpsimd_flush_thread(void)
@@ -958,7 +1011,7 @@ void fpsimd_flush_thread(void)
958 if (!system_supports_fpsimd()) 1011 if (!system_supports_fpsimd())
959 return; 1012 return;
960 1013
961 local_bh_disable(); 1014 get_cpu_fpsimd_context();
962 1015
963 fpsimd_flush_task_state(current); 1016 fpsimd_flush_task_state(current);
964 memset(&current->thread.uw.fpsimd_state, 0, 1017 memset(&current->thread.uw.fpsimd_state, 0,
@@ -999,7 +1052,7 @@ void fpsimd_flush_thread(void)
999 current->thread.sve_vl_onexec = 0; 1052 current->thread.sve_vl_onexec = 0;
1000 } 1053 }
1001 1054
1002 local_bh_enable(); 1055 put_cpu_fpsimd_context();
1003} 1056}
1004 1057
1005/* 1058/*
@@ -1011,9 +1064,9 @@ void fpsimd_preserve_current_state(void)
1011 if (!system_supports_fpsimd()) 1064 if (!system_supports_fpsimd())
1012 return; 1065 return;
1013 1066
1014 local_bh_disable(); 1067 get_cpu_fpsimd_context();
1015 fpsimd_save(); 1068 fpsimd_save();
1016 local_bh_enable(); 1069 put_cpu_fpsimd_context();
1017} 1070}
1018 1071
1019/* 1072/*
@@ -1030,7 +1083,8 @@ void fpsimd_signal_preserve_current_state(void)
1030 1083
1031/* 1084/*
1032 * Associate current's FPSIMD context with this cpu 1085 * Associate current's FPSIMD context with this cpu
1033 * Preemption must be disabled when calling this function. 1086 * The caller must have ownership of the cpu FPSIMD context before calling
1087 * this function.
1034 */ 1088 */
1035void fpsimd_bind_task_to_cpu(void) 1089void fpsimd_bind_task_to_cpu(void)
1036{ 1090{
@@ -1076,14 +1130,14 @@ void fpsimd_restore_current_state(void)
1076 if (!system_supports_fpsimd()) 1130 if (!system_supports_fpsimd())
1077 return; 1131 return;
1078 1132
1079 local_bh_disable(); 1133 get_cpu_fpsimd_context();
1080 1134
1081 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { 1135 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
1082 task_fpsimd_load(); 1136 task_fpsimd_load();
1083 fpsimd_bind_task_to_cpu(); 1137 fpsimd_bind_task_to_cpu();
1084 } 1138 }
1085 1139
1086 local_bh_enable(); 1140 put_cpu_fpsimd_context();
1087} 1141}
1088 1142
1089/* 1143/*
@@ -1096,7 +1150,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
1096 if (!system_supports_fpsimd()) 1150 if (!system_supports_fpsimd())
1097 return; 1151 return;
1098 1152
1099 local_bh_disable(); 1153 get_cpu_fpsimd_context();
1100 1154
1101 current->thread.uw.fpsimd_state = *state; 1155 current->thread.uw.fpsimd_state = *state;
1102 if (system_supports_sve() && test_thread_flag(TIF_SVE)) 1156 if (system_supports_sve() && test_thread_flag(TIF_SVE))
@@ -1107,7 +1161,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
1107 1161
1108 clear_thread_flag(TIF_FOREIGN_FPSTATE); 1162 clear_thread_flag(TIF_FOREIGN_FPSTATE);
1109 1163
1110 local_bh_enable(); 1164 put_cpu_fpsimd_context();
1111} 1165}
1112 1166
1113/* 1167/*
@@ -1133,18 +1187,29 @@ void fpsimd_flush_task_state(struct task_struct *t)
1133 1187
1134/* 1188/*
1135 * Invalidate any task's FPSIMD state that is present on this cpu. 1189 * Invalidate any task's FPSIMD state that is present on this cpu.
1136 * This function must be called with softirqs disabled. 1190 * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
1191 * before calling this function.
1137 */ 1192 */
1138void fpsimd_flush_cpu_state(void) 1193static void fpsimd_flush_cpu_state(void)
1139{ 1194{
1140 __this_cpu_write(fpsimd_last_state.st, NULL); 1195 __this_cpu_write(fpsimd_last_state.st, NULL);
1141 set_thread_flag(TIF_FOREIGN_FPSTATE); 1196 set_thread_flag(TIF_FOREIGN_FPSTATE);
1142} 1197}
1143 1198
1144#ifdef CONFIG_KERNEL_MODE_NEON 1199/*
1200 * Save the FPSIMD state to memory and invalidate cpu view.
1201 * This function must be called with preemption disabled.
1202 */
1203void fpsimd_save_and_flush_cpu_state(void)
1204{
1205 WARN_ON(preemptible());
1206 __get_cpu_fpsimd_context();
1207 fpsimd_save();
1208 fpsimd_flush_cpu_state();
1209 __put_cpu_fpsimd_context();
1210}
1145 1211
1146DEFINE_PER_CPU(bool, kernel_neon_busy); 1212#ifdef CONFIG_KERNEL_MODE_NEON
1147EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
1148 1213
1149/* 1214/*
1150 * Kernel-side NEON support functions 1215 * Kernel-side NEON support functions
@@ -1170,19 +1235,13 @@ void kernel_neon_begin(void)
1170 1235
1171 BUG_ON(!may_use_simd()); 1236 BUG_ON(!may_use_simd());
1172 1237
1173 local_bh_disable(); 1238 get_cpu_fpsimd_context();
1174
1175 __this_cpu_write(kernel_neon_busy, true);
1176 1239
1177 /* Save unsaved fpsimd state, if any: */ 1240 /* Save unsaved fpsimd state, if any: */
1178 fpsimd_save(); 1241 fpsimd_save();
1179 1242
1180 /* Invalidate any task state remaining in the fpsimd regs: */ 1243 /* Invalidate any task state remaining in the fpsimd regs: */
1181 fpsimd_flush_cpu_state(); 1244 fpsimd_flush_cpu_state();
1182
1183 preempt_disable();
1184
1185 local_bh_enable();
1186} 1245}
1187EXPORT_SYMBOL(kernel_neon_begin); 1246EXPORT_SYMBOL(kernel_neon_begin);
1188 1247
@@ -1197,15 +1256,10 @@ EXPORT_SYMBOL(kernel_neon_begin);
1197 */ 1256 */
1198void kernel_neon_end(void) 1257void kernel_neon_end(void)
1199{ 1258{
1200 bool busy;
1201
1202 if (!system_supports_fpsimd()) 1259 if (!system_supports_fpsimd())
1203 return; 1260 return;
1204 1261
1205 busy = __this_cpu_xchg(kernel_neon_busy, false); 1262 put_cpu_fpsimd_context();
1206 WARN_ON(!busy); /* No matching kernel_neon_begin()? */
1207
1208 preempt_enable();
1209} 1263}
1210EXPORT_SYMBOL(kernel_neon_end); 1264EXPORT_SYMBOL(kernel_neon_end);
1211 1265
@@ -1297,8 +1351,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
1297{ 1351{
1298 switch (cmd) { 1352 switch (cmd) {
1299 case CPU_PM_ENTER: 1353 case CPU_PM_ENTER:
1300 fpsimd_save(); 1354 fpsimd_save_and_flush_cpu_state();
1301 fpsimd_flush_cpu_state();
1302 break; 1355 break;
1303 case CPU_PM_EXIT: 1356 case CPU_PM_EXIT:
1304 break; 1357 break;
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 04ca08086d35..2b85c0d6fa3d 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -67,7 +67,11 @@
67 67
68#ifdef CONFIG_EFI 68#ifdef CONFIG_EFI
69 69
70__efistub_stext_offset = stext - _text; 70/*
71 * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol:
72 * https://github.com/ClangBuiltLinux/linux/issues/561
73 */
74__efistub_stext_offset = ABSOLUTE(stext - _text);
71 75
72/* 76/*
73 * The EFI stub has its own symbol namespace prefixed by __efistub_, to 77 * The EFI stub has its own symbol namespace prefixed by __efistub_, to
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index c70034fbd4ce..04a327ccf84d 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -16,8 +16,10 @@
16#include <linux/smp.h> 16#include <linux/smp.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/irqchip.h> 18#include <linux/irqchip.h>
19#include <linux/kprobes.h>
19#include <linux/seq_file.h> 20#include <linux/seq_file.h>
20#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22#include <asm/daifflags.h>
21#include <asm/vmap_stack.h> 23#include <asm/vmap_stack.h>
22 24
23unsigned long irq_err_count; 25unsigned long irq_err_count;
@@ -64,4 +66,28 @@ void __init init_IRQ(void)
64 irqchip_init(); 66 irqchip_init();
65 if (!handle_arch_irq) 67 if (!handle_arch_irq)
66 panic("No interrupt controller found."); 68 panic("No interrupt controller found.");
69
70 if (system_uses_irq_prio_masking()) {
71 /*
72 * Now that we have a stack for our IRQ handler, set
73 * the PMR/PSR pair to a consistent state.
74 */
75 WARN_ON(read_sysreg(daif) & PSR_A_BIT);
76 local_daif_restore(DAIF_PROCCTX_NOIRQ);
77 }
78}
79
80/*
81 * Stubs to make nmi_enter/exit() code callable from ASM
82 */
83asmlinkage void notrace asm_nmi_enter(void)
84{
85 nmi_enter();
86}
87NOKPROBE_SYMBOL(asm_nmi_enter);
88
89asmlinkage void notrace asm_nmi_exit(void)
90{
91 nmi_exit();
67} 92}
93NOKPROBE_SYMBOL(asm_nmi_exit);
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index e23a68a5808f..46e643e30708 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -21,6 +21,7 @@
21 21
22void *module_alloc(unsigned long size) 22void *module_alloc(unsigned long size)
23{ 23{
24 u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
24 gfp_t gfp_mask = GFP_KERNEL; 25 gfp_t gfp_mask = GFP_KERNEL;
25 void *p; 26 void *p;
26 27
@@ -28,9 +29,12 @@ void *module_alloc(unsigned long size)
28 if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) 29 if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
29 gfp_mask |= __GFP_NOWARN; 30 gfp_mask |= __GFP_NOWARN;
30 31
32 if (IS_ENABLED(CONFIG_KASAN))
33 /* don't exceed the static module region - see below */
34 module_alloc_end = MODULES_END;
35
31 p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, 36 p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
32 module_alloc_base + MODULES_VSIZE, 37 module_alloc_end, gfp_mask, PAGE_KERNEL, 0,
33 gfp_mask, PAGE_KERNEL_EXEC, 0,
34 NUMA_NO_NODE, __builtin_return_address(0)); 38 NUMA_NO_NODE, __builtin_return_address(0));
35 39
36 if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && 40 if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
@@ -46,7 +50,7 @@ void *module_alloc(unsigned long size)
46 */ 50 */
47 p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, 51 p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
48 module_alloc_base + SZ_2G, GFP_KERNEL, 52 module_alloc_base + SZ_2G, GFP_KERNEL,
49 PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, 53 PAGE_KERNEL, 0, NUMA_NO_NODE,
50 __builtin_return_address(0)); 54 __builtin_return_address(0));
51 55
52 if (p && (kasan_module_alloc(p, size) < 0)) { 56 if (p && (kasan_module_alloc(p, size) < 0)) {
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 88ce502c8e6f..bd5dfffca272 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -122,8 +122,10 @@ void *alloc_insn_page(void)
122 void *page; 122 void *page;
123 123
124 page = vmalloc_exec(PAGE_SIZE); 124 page = vmalloc_exec(PAGE_SIZE);
125 if (page) 125 if (page) {
126 set_memory_ro((unsigned long)page, 1); 126 set_memory_ro((unsigned long)page, 1);
127 set_vm_flush_reset_perms(page);
128 }
127 129
128 return page; 130 return page;
129} 131}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 9856395ccdb7..6a869d9f304f 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -83,7 +83,7 @@ static void __cpu_do_idle_irqprio(void)
83 * be raised. 83 * be raised.
84 */ 84 */
85 pmr = gic_read_pmr(); 85 pmr = gic_read_pmr();
86 gic_write_pmr(GIC_PRIO_IRQON); 86 gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
87 87
88 __cpu_do_idle(); 88 __cpu_do_idle();
89 89
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index da2441d7b066..3cf3b135027e 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1808,8 +1808,12 @@ static void tracehook_report_syscall(struct pt_regs *regs,
1808 1808
1809int syscall_trace_enter(struct pt_regs *regs) 1809int syscall_trace_enter(struct pt_regs *regs)
1810{ 1810{
1811 if (test_thread_flag(TIF_SYSCALL_TRACE)) 1811 if (test_thread_flag(TIF_SYSCALL_TRACE) ||
1812 test_thread_flag(TIF_SYSCALL_EMU)) {
1812 tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); 1813 tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
1814 if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU))
1815 return -1;
1816 }
1813 1817
1814 /* Do the secure computing after ptrace; failures should be fast. */ 1818 /* Do the secure computing after ptrace; failures should be fast. */
1815 if (secure_computing(NULL) == -1) 1819 if (secure_computing(NULL) == -1)
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 331d1e5acad4..12a585386c2f 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -18,42 +18,7 @@
18#include <asm/traps.h> 18#include <asm/traps.h>
19#include <linux/uaccess.h> 19#include <linux/uaccess.h>
20#include <asm/unistd.h> 20#include <asm/unistd.h>
21 21#include <asm/vdso.h>
22struct compat_sigcontext {
23 /* We always set these two fields to 0 */
24 compat_ulong_t trap_no;
25 compat_ulong_t error_code;
26
27 compat_ulong_t oldmask;
28 compat_ulong_t arm_r0;
29 compat_ulong_t arm_r1;
30 compat_ulong_t arm_r2;
31 compat_ulong_t arm_r3;
32 compat_ulong_t arm_r4;
33 compat_ulong_t arm_r5;
34 compat_ulong_t arm_r6;
35 compat_ulong_t arm_r7;
36 compat_ulong_t arm_r8;
37 compat_ulong_t arm_r9;
38 compat_ulong_t arm_r10;
39 compat_ulong_t arm_fp;
40 compat_ulong_t arm_ip;
41 compat_ulong_t arm_sp;
42 compat_ulong_t arm_lr;
43 compat_ulong_t arm_pc;
44 compat_ulong_t arm_cpsr;
45 compat_ulong_t fault_address;
46};
47
48struct compat_ucontext {
49 compat_ulong_t uc_flags;
50 compat_uptr_t uc_link;
51 compat_stack_t uc_stack;
52 struct compat_sigcontext uc_mcontext;
53 compat_sigset_t uc_sigmask;
54 int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))];
55 compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
56};
57 22
58struct compat_vfp_sigframe { 23struct compat_vfp_sigframe {
59 compat_ulong_t magic; 24 compat_ulong_t magic;
@@ -81,16 +46,6 @@ struct compat_aux_sigframe {
81 unsigned long end_magic; 46 unsigned long end_magic;
82} __attribute__((__aligned__(8))); 47} __attribute__((__aligned__(8)));
83 48
84struct compat_sigframe {
85 struct compat_ucontext uc;
86 compat_ulong_t retcode[2];
87};
88
89struct compat_rt_sigframe {
90 struct compat_siginfo info;
91 struct compat_sigframe sig;
92};
93
94#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 49#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
95 50
96static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) 51static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
@@ -387,6 +342,30 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
387 retcode = ptr_to_compat(ka->sa.sa_restorer); 342 retcode = ptr_to_compat(ka->sa.sa_restorer);
388 } else { 343 } else {
389 /* Set up sigreturn pointer */ 344 /* Set up sigreturn pointer */
345#ifdef CONFIG_COMPAT_VDSO
346 void *vdso_base = current->mm->context.vdso;
347 void *vdso_trampoline;
348
349 if (ka->sa.sa_flags & SA_SIGINFO) {
350 if (thumb) {
351 vdso_trampoline = VDSO_SYMBOL(vdso_base,
352 compat_rt_sigreturn_thumb);
353 } else {
354 vdso_trampoline = VDSO_SYMBOL(vdso_base,
355 compat_rt_sigreturn_arm);
356 }
357 } else {
358 if (thumb) {
359 vdso_trampoline = VDSO_SYMBOL(vdso_base,
360 compat_sigreturn_thumb);
361 } else {
362 vdso_trampoline = VDSO_SYMBOL(vdso_base,
363 compat_sigreturn_arm);
364 }
365 }
366
367 retcode = ptr_to_compat(vdso_trampoline) + thumb;
368#else
390 unsigned int idx = thumb << 1; 369 unsigned int idx = thumb << 1;
391 370
392 if (ka->sa.sa_flags & SA_SIGINFO) 371 if (ka->sa.sa_flags & SA_SIGINFO)
@@ -394,6 +373,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
394 373
395 retcode = (unsigned long)current->mm->context.vdso + 374 retcode = (unsigned long)current->mm->context.vdso +
396 (idx << 2) + thumb; 375 (idx << 2) + thumb;
376#endif
397 } 377 }
398 378
399 regs->regs[0] = usig; 379 regs->regs[0] = usig;
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 3e53ffa07994..f5b04dd8a710 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -27,7 +27,7 @@
27 * aff0 = mpidr_masked & 0xff; 27 * aff0 = mpidr_masked & 0xff;
28 * aff1 = mpidr_masked & 0xff00; 28 * aff1 = mpidr_masked & 0xff00;
29 * aff2 = mpidr_masked & 0xff0000; 29 * aff2 = mpidr_masked & 0xff0000;
30 * aff2 = mpidr_masked & 0xff00000000; 30 * aff3 = mpidr_masked & 0xff00000000;
31 * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); 31 * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3);
32 *} 32 *}
33 * Input registers: rs0, rs1, rs2, rs3, mpidr, mask 33 * Input registers: rs0, rs1, rs2, rs3, mpidr, mask
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6dcf9607d770..9286ee6749e8 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -181,11 +181,7 @@ static void init_gic_priority_masking(void)
181 181
182 WARN_ON(!(cpuflags & PSR_I_BIT)); 182 WARN_ON(!(cpuflags & PSR_I_BIT));
183 183
184 gic_write_pmr(GIC_PRIO_IRQOFF); 184 gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
185
186 /* We can only unmask PSR.I if we can take aborts */
187 if (!(cpuflags & PSR_A_BIT))
188 write_sysreg(cpuflags & ~PSR_I_BIT, daif);
189} 185}
190 186
191/* 187/*
@@ -834,18 +830,23 @@ void arch_irq_work_raise(void)
834} 830}
835#endif 831#endif
836 832
837/* 833static void local_cpu_stop(void)
838 * ipi_cpu_stop - handle IPI from smp_send_stop()
839 */
840static void ipi_cpu_stop(unsigned int cpu)
841{ 834{
842 set_cpu_online(cpu, false); 835 set_cpu_online(smp_processor_id(), false);
843 836
844 local_daif_mask(); 837 local_daif_mask();
845 sdei_mask_local_cpu(); 838 sdei_mask_local_cpu();
839 cpu_park_loop();
840}
846 841
847 while (1) 842/*
848 cpu_relax(); 843 * We need to implement panic_smp_self_stop() for parallel panic() calls, so
844 * that cpu_online_mask gets correctly updated and smp_send_stop() can skip
845 * CPUs that have already stopped themselves.
846 */
847void panic_smp_self_stop(void)
848{
849 local_cpu_stop();
849} 850}
850 851
851#ifdef CONFIG_KEXEC_CORE 852#ifdef CONFIG_KEXEC_CORE
@@ -898,7 +899,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
898 899
899 case IPI_CPU_STOP: 900 case IPI_CPU_STOP:
900 irq_enter(); 901 irq_enter();
901 ipi_cpu_stop(cpu); 902 local_cpu_stop();
902 irq_exit(); 903 irq_exit();
903 break; 904 break;
904 905
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 985721a1264c..a835a1a53826 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -55,16 +55,19 @@ static void dump_backtrace_entry(unsigned long where)
55 printk(" %pS\n", (void *)where); 55 printk(" %pS\n", (void *)where);
56} 56}
57 57
58static void __dump_instr(const char *lvl, struct pt_regs *regs) 58static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
59{ 59{
60 unsigned long addr = instruction_pointer(regs); 60 unsigned long addr = instruction_pointer(regs);
61 char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; 61 char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
62 int i; 62 int i;
63 63
64 if (user_mode(regs))
65 return;
66
64 for (i = -4; i < 1; i++) { 67 for (i = -4; i < 1; i++) {
65 unsigned int val, bad; 68 unsigned int val, bad;
66 69
67 bad = get_user(val, &((u32 *)addr)[i]); 70 bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
68 71
69 if (!bad) 72 if (!bad)
70 p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); 73 p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
@@ -73,19 +76,8 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
73 break; 76 break;
74 } 77 }
75 } 78 }
76 printk("%sCode: %s\n", lvl, str);
77}
78 79
79static void dump_instr(const char *lvl, struct pt_regs *regs) 80 printk("%sCode: %s\n", lvl, str);
80{
81 if (!user_mode(regs)) {
82 mm_segment_t fs = get_fs();
83 set_fs(KERNEL_DS);
84 __dump_instr(lvl, regs);
85 set_fs(fs);
86 } else {
87 __dump_instr(lvl, regs);
88 }
89} 81}
90 82
91void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) 83void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
@@ -171,8 +163,7 @@ static int __die(const char *str, int err, struct pt_regs *regs)
171 print_modules(); 163 print_modules();
172 show_regs(regs); 164 show_regs(regs);
173 165
174 if (!user_mode(regs)) 166 dump_kernel_instr(KERN_EMERG, regs);
175 dump_instr(KERN_EMERG, regs);
176 167
177 return ret; 168 return ret;
178} 169}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 663b166241d0..354b11e27c07 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -20,41 +20,212 @@
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/timekeeper_internal.h> 21#include <linux/timekeeper_internal.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <vdso/datapage.h>
24#include <vdso/helpers.h>
25#include <vdso/vsyscall.h>
23 26
24#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
25#include <asm/signal32.h> 28#include <asm/signal32.h>
26#include <asm/vdso.h> 29#include <asm/vdso.h>
27#include <asm/vdso_datapage.h>
28 30
29extern char vdso_start[], vdso_end[]; 31extern char vdso_start[], vdso_end[];
30static unsigned long vdso_pages __ro_after_init; 32#ifdef CONFIG_COMPAT_VDSO
33extern char vdso32_start[], vdso32_end[];
34#endif /* CONFIG_COMPAT_VDSO */
35
36/* vdso_lookup arch_index */
37enum arch_vdso_type {
38 ARM64_VDSO = 0,
39#ifdef CONFIG_COMPAT_VDSO
40 ARM64_VDSO32 = 1,
41#endif /* CONFIG_COMPAT_VDSO */
42};
43#ifdef CONFIG_COMPAT_VDSO
44#define VDSO_TYPES (ARM64_VDSO32 + 1)
45#else
46#define VDSO_TYPES (ARM64_VDSO + 1)
47#endif /* CONFIG_COMPAT_VDSO */
48
49struct __vdso_abi {
50 const char *name;
51 const char *vdso_code_start;
52 const char *vdso_code_end;
53 unsigned long vdso_pages;
54 /* Data Mapping */
55 struct vm_special_mapping *dm;
56 /* Code Mapping */
57 struct vm_special_mapping *cm;
58};
59
60static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = {
61 {
62 .name = "vdso",
63 .vdso_code_start = vdso_start,
64 .vdso_code_end = vdso_end,
65 },
66#ifdef CONFIG_COMPAT_VDSO
67 {
68 .name = "vdso32",
69 .vdso_code_start = vdso32_start,
70 .vdso_code_end = vdso32_end,
71 },
72#endif /* CONFIG_COMPAT_VDSO */
73};
31 74
32/* 75/*
33 * The vDSO data page. 76 * The vDSO data page.
34 */ 77 */
35static union { 78static union {
36 struct vdso_data data; 79 struct vdso_data data[CS_BASES];
37 u8 page[PAGE_SIZE]; 80 u8 page[PAGE_SIZE];
38} vdso_data_store __page_aligned_data; 81} vdso_data_store __page_aligned_data;
39struct vdso_data *vdso_data = &vdso_data_store.data; 82struct vdso_data *vdso_data = vdso_data_store.data;
83
84static int __vdso_remap(enum arch_vdso_type arch_index,
85 const struct vm_special_mapping *sm,
86 struct vm_area_struct *new_vma)
87{
88 unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
89 unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end -
90 vdso_lookup[arch_index].vdso_code_start;
91
92 if (vdso_size != new_size)
93 return -EINVAL;
94
95 current->mm->context.vdso = (void *)new_vma->vm_start;
96
97 return 0;
98}
99
100static int __vdso_init(enum arch_vdso_type arch_index)
101{
102 int i;
103 struct page **vdso_pagelist;
104 unsigned long pfn;
105
106 if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) {
107 pr_err("vDSO is not a valid ELF object!\n");
108 return -EINVAL;
109 }
110
111 vdso_lookup[arch_index].vdso_pages = (
112 vdso_lookup[arch_index].vdso_code_end -
113 vdso_lookup[arch_index].vdso_code_start) >>
114 PAGE_SHIFT;
115
116 /* Allocate the vDSO pagelist, plus a page for the data. */
117 vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1,
118 sizeof(struct page *),
119 GFP_KERNEL);
120 if (vdso_pagelist == NULL)
121 return -ENOMEM;
122
123 /* Grab the vDSO data page. */
124 vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
125
126
127 /* Grab the vDSO code pages. */
128 pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start);
129
130 for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++)
131 vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
132
133 vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0];
134 vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1];
135
136 return 0;
137}
138
139static int __setup_additional_pages(enum arch_vdso_type arch_index,
140 struct mm_struct *mm,
141 struct linux_binprm *bprm,
142 int uses_interp)
143{
144 unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
145 void *ret;
146
147 vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT;
148 /* Be sure to map the data page */
149 vdso_mapping_len = vdso_text_len + PAGE_SIZE;
150
151 vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
152 if (IS_ERR_VALUE(vdso_base)) {
153 ret = ERR_PTR(vdso_base);
154 goto up_fail;
155 }
156
157 ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
158 VM_READ|VM_MAYREAD,
159 vdso_lookup[arch_index].dm);
160 if (IS_ERR(ret))
161 goto up_fail;
162
163 vdso_base += PAGE_SIZE;
164 mm->context.vdso = (void *)vdso_base;
165 ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
166 VM_READ|VM_EXEC|
167 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
168 vdso_lookup[arch_index].cm);
169 if (IS_ERR(ret))
170 goto up_fail;
171
172 return 0;
173
174up_fail:
175 mm->context.vdso = NULL;
176 return PTR_ERR(ret);
177}
40 178
41#ifdef CONFIG_COMPAT 179#ifdef CONFIG_COMPAT
42/* 180/*
43 * Create and map the vectors page for AArch32 tasks. 181 * Create and map the vectors page for AArch32 tasks.
44 */ 182 */
183#ifdef CONFIG_COMPAT_VDSO
184static int aarch32_vdso_mremap(const struct vm_special_mapping *sm,
185 struct vm_area_struct *new_vma)
186{
187 return __vdso_remap(ARM64_VDSO32, sm, new_vma);
188}
189#endif /* CONFIG_COMPAT_VDSO */
190
191/*
192 * aarch32_vdso_pages:
193 * 0 - kuser helpers
194 * 1 - sigreturn code
195 * or (CONFIG_COMPAT_VDSO):
196 * 0 - kuser helpers
197 * 1 - vdso data
198 * 2 - vdso code
199 */
45#define C_VECTORS 0 200#define C_VECTORS 0
201#ifdef CONFIG_COMPAT_VDSO
202#define C_VVAR 1
203#define C_VDSO 2
204#define C_PAGES (C_VDSO + 1)
205#else
46#define C_SIGPAGE 1 206#define C_SIGPAGE 1
47#define C_PAGES (C_SIGPAGE + 1) 207#define C_PAGES (C_SIGPAGE + 1)
208#endif /* CONFIG_COMPAT_VDSO */
48static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init; 209static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
49static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { 210static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
50 { 211 {
51 .name = "[vectors]", /* ABI */ 212 .name = "[vectors]", /* ABI */
52 .pages = &aarch32_vdso_pages[C_VECTORS], 213 .pages = &aarch32_vdso_pages[C_VECTORS],
53 }, 214 },
215#ifdef CONFIG_COMPAT_VDSO
216 {
217 .name = "[vvar]",
218 },
219 {
220 .name = "[vdso]",
221 .mremap = aarch32_vdso_mremap,
222 },
223#else
54 { 224 {
55 .name = "[sigpage]", /* ABI */ 225 .name = "[sigpage]", /* ABI */
56 .pages = &aarch32_vdso_pages[C_SIGPAGE], 226 .pages = &aarch32_vdso_pages[C_SIGPAGE],
57 }, 227 },
228#endif /* CONFIG_COMPAT_VDSO */
58}; 229};
59 230
60static int aarch32_alloc_kuser_vdso_page(void) 231static int aarch32_alloc_kuser_vdso_page(void)
@@ -77,7 +248,33 @@ static int aarch32_alloc_kuser_vdso_page(void)
77 return 0; 248 return 0;
78} 249}
79 250
80static int __init aarch32_alloc_vdso_pages(void) 251#ifdef CONFIG_COMPAT_VDSO
252static int __aarch32_alloc_vdso_pages(void)
253{
254 int ret;
255
256 vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR];
257 vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO];
258
259 ret = __vdso_init(ARM64_VDSO32);
260 if (ret)
261 return ret;
262
263 ret = aarch32_alloc_kuser_vdso_page();
264 if (ret) {
265 unsigned long c_vvar =
266 (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]);
267 unsigned long c_vdso =
268 (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]);
269
270 free_page(c_vvar);
271 free_page(c_vdso);
272 }
273
274 return ret;
275}
276#else
277static int __aarch32_alloc_vdso_pages(void)
81{ 278{
82 extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; 279 extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
83 int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; 280 int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
@@ -98,6 +295,12 @@ static int __init aarch32_alloc_vdso_pages(void)
98 295
99 return ret; 296 return ret;
100} 297}
298#endif /* CONFIG_COMPAT_VDSO */
299
300static int __init aarch32_alloc_vdso_pages(void)
301{
302 return __aarch32_alloc_vdso_pages();
303}
101arch_initcall(aarch32_alloc_vdso_pages); 304arch_initcall(aarch32_alloc_vdso_pages);
102 305
103static int aarch32_kuser_helpers_setup(struct mm_struct *mm) 306static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
@@ -119,6 +322,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
119 return PTR_ERR_OR_ZERO(ret); 322 return PTR_ERR_OR_ZERO(ret);
120} 323}
121 324
325#ifndef CONFIG_COMPAT_VDSO
122static int aarch32_sigreturn_setup(struct mm_struct *mm) 326static int aarch32_sigreturn_setup(struct mm_struct *mm)
123{ 327{
124 unsigned long addr; 328 unsigned long addr;
@@ -146,6 +350,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm)
146out: 350out:
147 return PTR_ERR_OR_ZERO(ret); 351 return PTR_ERR_OR_ZERO(ret);
148} 352}
353#endif /* !CONFIG_COMPAT_VDSO */
149 354
150int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 355int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
151{ 356{
@@ -159,7 +364,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
159 if (ret) 364 if (ret)
160 goto out; 365 goto out;
161 366
367#ifdef CONFIG_COMPAT_VDSO
368 ret = __setup_additional_pages(ARM64_VDSO32,
369 mm,
370 bprm,
371 uses_interp);
372#else
162 ret = aarch32_sigreturn_setup(mm); 373 ret = aarch32_sigreturn_setup(mm);
374#endif /* CONFIG_COMPAT_VDSO */
163 375
164out: 376out:
165 up_write(&mm->mmap_sem); 377 up_write(&mm->mmap_sem);
@@ -170,18 +382,18 @@ out:
170static int vdso_mremap(const struct vm_special_mapping *sm, 382static int vdso_mremap(const struct vm_special_mapping *sm,
171 struct vm_area_struct *new_vma) 383 struct vm_area_struct *new_vma)
172{ 384{
173 unsigned long new_size = new_vma->vm_end - new_vma->vm_start; 385 return __vdso_remap(ARM64_VDSO, sm, new_vma);
174 unsigned long vdso_size = vdso_end - vdso_start;
175
176 if (vdso_size != new_size)
177 return -EINVAL;
178
179 current->mm->context.vdso = (void *)new_vma->vm_start;
180
181 return 0;
182} 386}
183 387
184static struct vm_special_mapping vdso_spec[2] __ro_after_init = { 388/*
389 * aarch64_vdso_pages:
390 * 0 - vvar
391 * 1 - vdso
392 */
393#define A_VVAR 0
394#define A_VDSO 1
395#define A_PAGES (A_VDSO + 1)
396static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
185 { 397 {
186 .name = "[vvar]", 398 .name = "[vvar]",
187 }, 399 },
@@ -193,37 +405,10 @@ static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
193 405
194static int __init vdso_init(void) 406static int __init vdso_init(void)
195{ 407{
196 int i; 408 vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR];
197 struct page **vdso_pagelist; 409 vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO];
198 unsigned long pfn;
199
200 if (memcmp(vdso_start, "\177ELF", 4)) {
201 pr_err("vDSO is not a valid ELF object!\n");
202 return -EINVAL;
203 }
204
205 vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
206
207 /* Allocate the vDSO pagelist, plus a page for the data. */
208 vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
209 GFP_KERNEL);
210 if (vdso_pagelist == NULL)
211 return -ENOMEM;
212
213 /* Grab the vDSO data page. */
214 vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
215
216
217 /* Grab the vDSO code pages. */
218 pfn = sym_to_pfn(vdso_start);
219
220 for (i = 0; i < vdso_pages; i++)
221 vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
222 410
223 vdso_spec[0].pages = &vdso_pagelist[0]; 411 return __vdso_init(ARM64_VDSO);
224 vdso_spec[1].pages = &vdso_pagelist[1];
225
226 return 0;
227} 412}
228arch_initcall(vdso_init); 413arch_initcall(vdso_init);
229 414
@@ -231,84 +416,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
231 int uses_interp) 416 int uses_interp)
232{ 417{
233 struct mm_struct *mm = current->mm; 418 struct mm_struct *mm = current->mm;
234 unsigned long vdso_base, vdso_text_len, vdso_mapping_len; 419 int ret;
235 void *ret;
236
237 vdso_text_len = vdso_pages << PAGE_SHIFT;
238 /* Be sure to map the data page */
239 vdso_mapping_len = vdso_text_len + PAGE_SIZE;
240 420
241 if (down_write_killable(&mm->mmap_sem)) 421 if (down_write_killable(&mm->mmap_sem))
242 return -EINTR; 422 return -EINTR;
243 vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
244 if (IS_ERR_VALUE(vdso_base)) {
245 ret = ERR_PTR(vdso_base);
246 goto up_fail;
247 }
248 ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
249 VM_READ|VM_MAYREAD,
250 &vdso_spec[0]);
251 if (IS_ERR(ret))
252 goto up_fail;
253
254 vdso_base += PAGE_SIZE;
255 mm->context.vdso = (void *)vdso_base;
256 ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
257 VM_READ|VM_EXEC|
258 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
259 &vdso_spec[1]);
260 if (IS_ERR(ret))
261 goto up_fail;
262 423
424 ret = __setup_additional_pages(ARM64_VDSO,
425 mm,
426 bprm,
427 uses_interp);
263 428
264 up_write(&mm->mmap_sem); 429 up_write(&mm->mmap_sem);
265 return 0;
266
267up_fail:
268 mm->context.vdso = NULL;
269 up_write(&mm->mmap_sem);
270 return PTR_ERR(ret);
271}
272 430
273/* 431 return ret;
274 * Update the vDSO data page to keep in sync with kernel timekeeping.
275 */
276void update_vsyscall(struct timekeeper *tk)
277{
278 u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
279
280 ++vdso_data->tb_seq_count;
281 smp_wmb();
282
283 vdso_data->use_syscall = use_syscall;
284 vdso_data->xtime_coarse_sec = tk->xtime_sec;
285 vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
286 tk->tkr_mono.shift;
287 vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
288 vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
289
290 /* Read without the seqlock held by clock_getres() */
291 WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
292
293 if (!use_syscall) {
294 /* tkr_mono.cycle_last == tkr_raw.cycle_last */
295 vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
296 vdso_data->raw_time_sec = tk->raw_sec;
297 vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
298 vdso_data->xtime_clock_sec = tk->xtime_sec;
299 vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
300 vdso_data->cs_mono_mult = tk->tkr_mono.mult;
301 vdso_data->cs_raw_mult = tk->tkr_raw.mult;
302 /* tkr_mono.shift == tkr_raw.shift */
303 vdso_data->cs_shift = tk->tkr_mono.shift;
304 }
305
306 smp_wmb();
307 ++vdso_data->tb_seq_count;
308}
309
310void update_vsyscall_tz(void)
311{
312 vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
313 vdso_data->tz_dsttime = sys_tz.tz_dsttime;
314} 432}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index fa230ff09aa1..4ab863045188 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -6,7 +6,12 @@
6# Heavily based on the vDSO Makefiles for other archs. 6# Heavily based on the vDSO Makefiles for other archs.
7# 7#
8 8
9obj-vdso := gettimeofday.o note.o sigreturn.o 9# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
10# the inclusion of generic Makefile.
11ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
12include $(srctree)/lib/vdso/Makefile
13
14obj-vdso := vgettimeofday.o note.o sigreturn.o
10 15
11# Build rules 16# Build rules
12targets := $(obj-vdso) vdso.so vdso.so.dbg 17targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -15,6 +20,31 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
15ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ 20ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
16 --build-id -n -T 21 --build-id -n -T
17 22
23ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
24ccflags-y += -DDISABLE_BRANCH_PROFILING
25
26VDSO_LDFLAGS := -Bsymbolic
27
28CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
29KBUILD_CFLAGS += $(DISABLE_LTO)
30KASAN_SANITIZE := n
31UBSAN_SANITIZE := n
32OBJECT_FILES_NON_STANDARD := y
33KCOV_INSTRUMENT := n
34
35ifeq ($(c-gettimeofday-y),)
36CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
37else
38CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
39endif
40
41# Clang versions less than 8 do not support -mcmodel=tiny
42ifeq ($(CONFIG_CC_IS_CLANG), y)
43 ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
44 CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
45 endif
46endif
47
18# Disable gcov profiling for VDSO code 48# Disable gcov profiling for VDSO code
19GCOV_PROFILE := n 49GCOV_PROFILE := n
20 50
@@ -28,6 +58,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
28# Link rule for the .so file, .lds has to be first 58# Link rule for the .so file, .lds has to be first
29$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE 59$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
30 $(call if_changed,ld) 60 $(call if_changed,ld)
61 $(call if_changed,vdso_check)
31 62
32# Strip rule for the .so file 63# Strip rule for the .so file
33$(obj)/%.so: OBJCOPYFLAGS := -S 64$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -42,13 +73,9 @@ quiet_cmd_vdsosym = VDSOSYM $@
42include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE 73include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
43 $(call if_changed,vdsosym) 74 $(call if_changed,vdsosym)
44 75
45# Assembly rules for the .S files
46$(obj-vdso): %.o: %.S FORCE
47 $(call if_changed_dep,vdsoas)
48
49# Actual build commands 76# Actual build commands
50quiet_cmd_vdsoas = VDSOA $@ 77quiet_cmd_vdsocc = VDSOCC $@
51 cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< 78 cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
52 79
53# Install commands for the unstripped file 80# Install commands for the unstripped file
54quiet_cmd_vdso_install = INSTALL $@ 81quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index 80f780f56e0d..e69de29bb2d1 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -1,323 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Userspace implementations of gettimeofday() and friends.
4 *
5 * Copyright (C) 2012 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 */
9
10#include <linux/linkage.h>
11#include <asm/asm-offsets.h>
12#include <asm/unistd.h>
13
14#define NSEC_PER_SEC_LO16 0xca00
15#define NSEC_PER_SEC_HI16 0x3b9a
16
17vdso_data .req x6
18seqcnt .req w7
19w_tmp .req w8
20x_tmp .req x8
21
22/*
23 * Conventions for macro arguments:
24 * - An argument is write-only if its name starts with "res".
25 * - All other arguments are read-only, unless otherwise specified.
26 */
27
28 .macro seqcnt_acquire
299999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
30 tbnz seqcnt, #0, 9999b
31 dmb ishld
32 .endm
33
34 .macro seqcnt_check fail
35 dmb ishld
36 ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
37 cmp w_tmp, seqcnt
38 b.ne \fail
39 .endm
40
41 .macro syscall_check fail
42 ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
43 cbnz w_tmp, \fail
44 .endm
45
46 .macro get_nsec_per_sec res
47 mov \res, #NSEC_PER_SEC_LO16
48 movk \res, #NSEC_PER_SEC_HI16, lsl #16
49 .endm
50
51 /*
52 * Returns the clock delta, in nanoseconds left-shifted by the clock
53 * shift.
54 */
55 .macro get_clock_shifted_nsec res, cycle_last, mult
56 /* Read the virtual counter. */
57 isb
58 mrs x_tmp, cntvct_el0
59 /* Calculate cycle delta and convert to ns. */
60 sub \res, x_tmp, \cycle_last
61 /* We can only guarantee 56 bits of precision. */
62 movn x_tmp, #0xff00, lsl #48
63 and \res, x_tmp, \res
64 mul \res, \res, \mult
65 /*
66 * Fake address dependency from the value computed from the counter
67 * register to subsequent data page accesses so that the sequence
68 * locking also orders the read of the counter.
69 */
70 and x_tmp, \res, xzr
71 add vdso_data, vdso_data, x_tmp
72 .endm
73
74 /*
75 * Returns in res_{sec,nsec} the REALTIME timespec, based on the
76 * "wall time" (xtime) and the clock_mono delta.
77 */
78 .macro get_ts_realtime res_sec, res_nsec, \
79 clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
80 add \res_nsec, \clock_nsec, \xtime_nsec
81 udiv x_tmp, \res_nsec, \nsec_to_sec
82 add \res_sec, \xtime_sec, x_tmp
83 msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
84 .endm
85
86 /*
87 * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
88 * used for CLOCK_MONOTONIC_RAW.
89 */
90 .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
91 udiv \res_sec, \clock_nsec, \nsec_to_sec
92 msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
93 .endm
94
95 /* sec and nsec are modified in place. */
96 .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
97 /* Add timespec. */
98 add \sec, \sec, \ts_sec
99 add \nsec, \nsec, \ts_nsec
100
101 /* Normalise the new timespec. */
102 cmp \nsec, \nsec_to_sec
103 b.lt 9999f
104 sub \nsec, \nsec, \nsec_to_sec
105 add \sec, \sec, #1
1069999:
107 cmp \nsec, #0
108 b.ge 9998f
109 add \nsec, \nsec, \nsec_to_sec
110 sub \sec, \sec, #1
1119998:
112 .endm
113
114 .macro clock_gettime_return, shift=0
115 .if \shift == 1
116 lsr x11, x11, x12
117 .endif
118 stp x10, x11, [x1, #TSPEC_TV_SEC]
119 mov x0, xzr
120 ret
121 .endm
122
123 .macro jump_slot jumptable, index, label
124 .if (. - \jumptable) != 4 * (\index)
125 .error "Jump slot index mismatch"
126 .endif
127 b \label
128 .endm
129
130 .text
131
132/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
133ENTRY(__kernel_gettimeofday)
134 .cfi_startproc
135 adr vdso_data, _vdso_data
136 /* If tv is NULL, skip to the timezone code. */
137 cbz x0, 2f
138
139 /* Compute the time of day. */
1401: seqcnt_acquire
141 syscall_check fail=4f
142 ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
143 /* w11 = cs_mono_mult, w12 = cs_shift */
144 ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
145 ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
146
147 get_nsec_per_sec res=x9
148 lsl x9, x9, x12
149
150 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
151 seqcnt_check fail=1b
152 get_ts_realtime res_sec=x10, res_nsec=x11, \
153 clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
154
155 /* Convert ns to us. */
156 mov x13, #1000
157 lsl x13, x13, x12
158 udiv x11, x11, x13
159 stp x10, x11, [x0, #TVAL_TV_SEC]
1602:
161 /* If tz is NULL, return 0. */
162 cbz x1, 3f
163 ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
164 stp w4, w5, [x1, #TZ_MINWEST]
1653:
166 mov x0, xzr
167 ret
1684:
169 /* Syscall fallback. */
170 mov x8, #__NR_gettimeofday
171 svc #0
172 ret
173 .cfi_endproc
174ENDPROC(__kernel_gettimeofday)
175
176#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
177
178/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
179ENTRY(__kernel_clock_gettime)
180 .cfi_startproc
181 cmp w0, #JUMPSLOT_MAX
182 b.hi syscall
183 adr vdso_data, _vdso_data
184 adr x_tmp, jumptable
185 add x_tmp, x_tmp, w0, uxtw #2
186 br x_tmp
187
188 ALIGN
189jumptable:
190 jump_slot jumptable, CLOCK_REALTIME, realtime
191 jump_slot jumptable, CLOCK_MONOTONIC, monotonic
192 b syscall
193 b syscall
194 jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
195 jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
196 jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
197
198 .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
199 .error "Wrong jumptable size"
200 .endif
201
202 ALIGN
203realtime:
204 seqcnt_acquire
205 syscall_check fail=syscall
206 ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
207 /* w11 = cs_mono_mult, w12 = cs_shift */
208 ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
209 ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
210
211 /* All computations are done with left-shifted nsecs. */
212 get_nsec_per_sec res=x9
213 lsl x9, x9, x12
214
215 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
216 seqcnt_check fail=realtime
217 get_ts_realtime res_sec=x10, res_nsec=x11, \
218 clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
219 clock_gettime_return, shift=1
220
221 ALIGN
222monotonic:
223 seqcnt_acquire
224 syscall_check fail=syscall
225 ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
226 /* w11 = cs_mono_mult, w12 = cs_shift */
227 ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
228 ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
229 ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
230
231 /* All computations are done with left-shifted nsecs. */
232 lsl x4, x4, x12
233 get_nsec_per_sec res=x9
234 lsl x9, x9, x12
235
236 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
237 seqcnt_check fail=monotonic
238 get_ts_realtime res_sec=x10, res_nsec=x11, \
239 clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
240
241 add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
242 clock_gettime_return, shift=1
243
244 ALIGN
245monotonic_raw:
246 seqcnt_acquire
247 syscall_check fail=syscall
248 ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
249 /* w11 = cs_raw_mult, w12 = cs_shift */
250 ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
251 ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
252
253 /* All computations are done with left-shifted nsecs. */
254 get_nsec_per_sec res=x9
255 lsl x9, x9, x12
256
257 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
258 seqcnt_check fail=monotonic_raw
259 get_ts_clock_raw res_sec=x10, res_nsec=x11, \
260 clock_nsec=x15, nsec_to_sec=x9
261
262 add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
263 clock_gettime_return, shift=1
264
265 ALIGN
266realtime_coarse:
267 seqcnt_acquire
268 ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
269 seqcnt_check fail=realtime_coarse
270 clock_gettime_return
271
272 ALIGN
273monotonic_coarse:
274 seqcnt_acquire
275 ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
276 ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
277 seqcnt_check fail=monotonic_coarse
278
279 /* Computations are done in (non-shifted) nsecs. */
280 get_nsec_per_sec res=x9
281 add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
282 clock_gettime_return
283
284 ALIGN
285syscall: /* Syscall fallback. */
286 mov x8, #__NR_clock_gettime
287 svc #0
288 ret
289 .cfi_endproc
290ENDPROC(__kernel_clock_gettime)
291
292/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
293ENTRY(__kernel_clock_getres)
294 .cfi_startproc
295 cmp w0, #CLOCK_REALTIME
296 ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
297 ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
298 b.ne 1f
299
300 adr vdso_data, _vdso_data
301 ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
302 b 2f
3031:
304 cmp w0, #CLOCK_REALTIME_COARSE
305 ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
306 b.ne 4f
307 ldr x2, 5f
3082:
309 cbz x1, 3f
310 stp xzr, x2, [x1]
311
3123: /* res == NULL. */
313 mov w0, wzr
314 ret
315
3164: /* Syscall fallback. */
317 mov x8, #__NR_clock_getres
318 svc #0
319 ret
3205:
321 .quad CLOCK_COARSE_RES
322 .cfi_endproc
323ENDPROC(__kernel_clock_getres)
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..747635501a14
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,27 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ARM64 userspace implementations of gettimeofday() and similar.
4 *
5 * Copyright (C) 2018 ARM Limited
6 *
7 */
8#include <linux/time.h>
9#include <linux/types.h>
10
11int __kernel_clock_gettime(clockid_t clock,
12 struct __kernel_timespec *ts)
13{
14 return __cvdso_clock_gettime(clock, ts);
15}
16
17int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
18 struct timezone *tz)
19{
20 return __cvdso_gettimeofday(tv, tz);
21}
22
23int __kernel_clock_getres(clockid_t clock_id,
24 struct __kernel_timespec *res)
25{
26 return __cvdso_clock_getres(clock_id, res);
27}
diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore
new file mode 100644
index 000000000000..4fea950fa5ed
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/.gitignore
@@ -0,0 +1,2 @@
1vdso.lds
2vdso.so.raw
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..288c14d30b45
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -0,0 +1,186 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Makefile for vdso32
4#
5
6# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
7# the inclusion of generic Makefile.
8ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
9include $(srctree)/lib/vdso/Makefile
10
11COMPATCC := $(CROSS_COMPILE_COMPAT)gcc
12
13# Same as cc-*option, but using COMPATCC instead of CC
14cc32-option = $(call try-run,\
15 $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
16cc32-disable-warning = $(call try-run,\
17 $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
18cc32-ldoption = $(call try-run,\
19 $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
20
21# We cannot use the global flags to compile the vDSO files, the main reason
22# being that the 32-bit compiler may be older than the main (64-bit) compiler
23# and therefore may not understand flags set using $(cc-option ...). Besides,
24# arch-specific options should be taken from the arm Makefile instead of the
25# arm64 one.
26# As a result we set our own flags here.
27
28# From top-level Makefile
29# NOSTDINC_FLAGS
30VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include)
31VDSO_CPPFLAGS += $(LINUXINCLUDE)
32VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS)
33
34# Common C and assembly flags
35# From top-level Makefile
36VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
37VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
38ifdef CONFIG_DEBUG_INFO
39VDSO_CAFLAGS += -g
40endif
41ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y)
42VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO
43endif
44
45# From arm Makefile
46VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm)
47VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft
48ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
49VDSO_CAFLAGS += -mbig-endian
50else
51VDSO_CAFLAGS += -mlittle-endian
52endif
53
54# From arm vDSO Makefile
55VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
56VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
57
58# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
59# fall back to v7. There is no easy way to check for what architecture the code
60# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
61VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\
62 -march=armv7-a -D__LINUX_ARM_ARCH__=7)
63
64VDSO_CFLAGS := $(VDSO_CAFLAGS)
65VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
66# KBUILD_CFLAGS from top-level Makefile
67VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
68 -fno-strict-aliasing -fno-common \
69 -Werror-implicit-function-declaration \
70 -Wno-format-security \
71 -std=gnu89
72VDSO_CFLAGS += -O2
73# Some useful compiler-dependent flags from top-level Makefile
74VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,)
75VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
76VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow)
77VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
78VDSO_CFLAGS += $(call cc32-option,-Werror=date-time)
79VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
80
81# The 32-bit compiler does not provide 128-bit integers, which are used in
82# some headers that are indirectly included from the vDSO code.
83# This hack makes the compiler happy and should trigger a warning/error if
84# variables of such type are referenced.
85VDSO_CFLAGS += -D__uint128_t='void*'
86# Silence some warnings coming from headers that operate on long's
87# (on GCC 4.8 or older, there is unfortunately no way to silence this warning)
88VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
89VDSO_CFLAGS += -Wno-int-to-pointer-cast
90
91VDSO_AFLAGS := $(VDSO_CAFLAGS)
92VDSO_AFLAGS += -D__ASSEMBLY__
93
94VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
95# From arm vDSO Makefile
96VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
97VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
98VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
99VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv)
100VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id)
101VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
102
103
104# Borrow vdsomunge.c from the arm vDSO
105# We have to use a relative path because scripts/Makefile.host prefixes
106# $(hostprogs-y) with $(obj)
107munge := ../../../arm/vdso/vdsomunge
108hostprogs-y := $(munge)
109
110c-obj-vdso := note.o
111c-obj-vdso-gettimeofday := vgettimeofday.o
112asm-obj-vdso := sigreturn.o
113
114ifneq ($(c-gettimeofday-y),)
115VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y)
116endif
117
118VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
119
120# Build rules
121targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
122c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
123c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
124asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
125obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
126
127obj-y += vdso.o
128extra-y += vdso.lds
129CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
130
131# Force dependency (vdso.s includes vdso.so through incbin)
132$(obj)/vdso.o: $(obj)/vdso.so
133
134include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
135 $(call if_changed,vdsosym)
136
137# Strip rule for vdso.so
138$(obj)/vdso.so: OBJCOPYFLAGS := -S
139$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
140 $(call if_changed,objcopy)
141
142$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
143 $(call if_changed,vdsomunge)
144
145# Link rule for the .so file, .lds has to be first
146$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
147 $(call if_changed,vdsold)
148 $(call if_changed,vdso_check)
149
150# Compilation rules for the vDSO sources
151$(c-obj-vdso): %.o: %.c FORCE
152 $(call if_changed_dep,vdsocc)
153$(c-obj-vdso-gettimeofday): %.o: %.c FORCE
154 $(call if_changed_dep,vdsocc_gettimeofday)
155$(asm-obj-vdso): %.o: %.S FORCE
156 $(call if_changed_dep,vdsoas)
157
158# Actual build commands
159quiet_cmd_vdsold = VDSOL $@
160 cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
161 -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
162quiet_cmd_vdsocc = VDSOC $@
163 cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
164quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@
165 cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
166quiet_cmd_vdsoas = VDSOA $@
167 cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
168
169quiet_cmd_vdsomunge = MUNGE $@
170 cmd_vdsomunge = $(obj)/$(munge) $< $@
171
172# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO)
173gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
174quiet_cmd_vdsosym = VDSOSYM $@
175# The AArch64 nm should be able to read an AArch32 binary
176 cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
177
178# Install commands for the unstripped file
179quiet_cmd_vdso_install = INSTALL $@
180 cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so
181
182vdso.so: $(obj)/vdso.so.dbg
183 @mkdir -p $(MODLIB)/vdso
184 $(call cmd,vdso_install)
185
186vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso32/note.c b/arch/arm64/kernel/vdso32/note.c
new file mode 100644
index 000000000000..eff5bf9efb8b
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/note.c
@@ -0,0 +1,15 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2012-2018 ARM Limited
4 *
5 * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
6 * Here we can supply some information useful to userland.
7 */
8
9#include <linux/uts.h>
10#include <linux/version.h>
11#include <linux/elfnote.h>
12#include <linux/build-salt.h>
13
14ELFNOTE32("Linux", 0, LINUX_VERSION_CODE);
15BUILD_SALT;
diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S
new file mode 100644
index 000000000000..1a81277c2d09
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/sigreturn.S
@@ -0,0 +1,62 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * This file provides both A32 and T32 versions, in accordance with the
4 * arm sigreturn code.
5 *
6 * Copyright (C) 2018 ARM Limited
7 */
8
9#include <linux/linkage.h>
10#include <asm/asm-offsets.h>
11#include <asm/unistd.h>
12
13#define ARM_ENTRY(name) \
14 ENTRY(name)
15
16#define ARM_ENDPROC(name) \
17 .type name, %function; \
18 END(name)
19
20 .text
21
22 .arm
23 .fnstart
24 .save {r0-r15}
25 .pad #COMPAT_SIGFRAME_REGS_OFFSET
26 nop
27ARM_ENTRY(__kernel_sigreturn_arm)
28 mov r7, #__NR_compat_sigreturn
29 svc #0
30 .fnend
31ARM_ENDPROC(__kernel_sigreturn_arm)
32
33 .fnstart
34 .save {r0-r15}
35 .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
36 nop
37ARM_ENTRY(__kernel_rt_sigreturn_arm)
38 mov r7, #__NR_compat_rt_sigreturn
39 svc #0
40 .fnend
41ARM_ENDPROC(__kernel_rt_sigreturn_arm)
42
43 .thumb
44 .fnstart
45 .save {r0-r15}
46 .pad #COMPAT_SIGFRAME_REGS_OFFSET
47 nop
48ARM_ENTRY(__kernel_sigreturn_thumb)
49 mov r7, #__NR_compat_sigreturn
50 svc #0
51 .fnend
52ARM_ENDPROC(__kernel_sigreturn_thumb)
53
54 .fnstart
55 .save {r0-r15}
56 .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
57 nop
58ARM_ENTRY(__kernel_rt_sigreturn_thumb)
59 mov r7, #__NR_compat_rt_sigreturn
60 svc #0
61 .fnend
62ARM_ENDPROC(__kernel_rt_sigreturn_thumb)
diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S
new file mode 100644
index 000000000000..e72ac7bc4c04
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vdso.S
@@ -0,0 +1,19 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2012 ARM Limited
4 */
5
6#include <linux/init.h>
7#include <linux/linkage.h>
8#include <linux/const.h>
9#include <asm/page.h>
10
11 .globl vdso32_start, vdso32_end
12 .section .rodata
13 .balign PAGE_SIZE
14vdso32_start:
15 .incbin "arch/arm64/kernel/vdso32/vdso.so"
16 .balign PAGE_SIZE
17vdso32_end:
18
19 .previous
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
new file mode 100644
index 000000000000..a3944927eaeb
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -0,0 +1,82 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Adapted from arm64 version.
4 *
5 * GNU linker script for the VDSO library.
6 * Heavily based on the vDSO linker scripts for other archs.
7 *
8 * Copyright (C) 2012-2018 ARM Limited
9 */
10
11#include <linux/const.h>
12#include <asm/page.h>
13#include <asm/vdso.h>
14
15OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
16OUTPUT_ARCH(arm)
17
18SECTIONS
19{
20 PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
21 . = VDSO_LBASE + SIZEOF_HEADERS;
22
23 .hash : { *(.hash) } :text
24 .gnu.hash : { *(.gnu.hash) }
25 .dynsym : { *(.dynsym) }
26 .dynstr : { *(.dynstr) }
27 .gnu.version : { *(.gnu.version) }
28 .gnu.version_d : { *(.gnu.version_d) }
29 .gnu.version_r : { *(.gnu.version_r) }
30
31 .note : { *(.note.*) } :text :note
32
33 .dynamic : { *(.dynamic) } :text :dynamic
34
35 .rodata : { *(.rodata*) } :text
36
37 .text : { *(.text*) } :text =0xe7f001f2
38
39 .got : { *(.got) }
40 .rel.plt : { *(.rel.plt) }
41
42 /DISCARD/ : {
43 *(.note.GNU-stack)
44 *(.data .data.* .gnu.linkonce.d.* .sdata*)
45 *(.bss .sbss .dynbss .dynsbss)
46 }
47}
48
49/*
50 * We must supply the ELF program headers explicitly to get just one
51 * PT_LOAD segment, and set the flags explicitly to make segments read-only.
52 */
53PHDRS
54{
55 text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
56 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
57 note PT_NOTE FLAGS(4); /* PF_R */
58}
59
60VERSION
61{
62 LINUX_2.6 {
63 global:
64 __vdso_clock_gettime;
65 __vdso_gettimeofday;
66 __vdso_clock_getres;
67 __kernel_sigreturn_arm;
68 __kernel_sigreturn_thumb;
69 __kernel_rt_sigreturn_arm;
70 __kernel_rt_sigreturn_thumb;
71 __vdso_clock_gettime64;
72 local: *;
73 };
74}
75
76/*
77 * Make the sigreturn code visible to the kernel.
78 */
79VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm;
80VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb;
81VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm;
82VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb;
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
new file mode 100644
index 000000000000..54fc1c2ce93f
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -0,0 +1,59 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ARM64 compat userspace implementations of gettimeofday() and similar.
4 *
5 * Copyright (C) 2018 ARM Limited
6 *
7 */
8#include <linux/time.h>
9#include <linux/types.h>
10
11int __vdso_clock_gettime(clockid_t clock,
12 struct old_timespec32 *ts)
13{
14 /* The checks below are required for ABI consistency with arm */
15 if ((u32)ts >= TASK_SIZE_32)
16 return -EFAULT;
17
18 return __cvdso_clock_gettime32(clock, ts);
19}
20
21int __vdso_clock_gettime64(clockid_t clock,
22 struct __kernel_timespec *ts)
23{
24 /* The checks below are required for ABI consistency with arm */
25 if ((u32)ts >= TASK_SIZE_32)
26 return -EFAULT;
27
28 return __cvdso_clock_gettime(clock, ts);
29}
30
31int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
32 struct timezone *tz)
33{
34 return __cvdso_gettimeofday(tv, tz);
35}
36
37int __vdso_clock_getres(clockid_t clock_id,
38 struct old_timespec32 *res)
39{
40 /* The checks below are required for ABI consistency with arm */
41 if ((u32)res >= TASK_SIZE_32)
42 return -EFAULT;
43
44 return __cvdso_clock_getres_time32(clock_id, res);
45}
46
47/* Avoid unresolved references emitted by GCC */
48
49void __aeabi_unwind_cpp_pr0(void)
50{
51}
52
53void __aeabi_unwind_cpp_pr1(void)
54{
55}
56
57void __aeabi_unwind_cpp_pr2(void)
58{
59}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 6e3c9c8b2df9..525010504f9d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
112 if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { 112 if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
113 u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; 113 u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1];
114 114
115 /* Clean guest FP state to memory and invalidate cpu view */ 115 fpsimd_save_and_flush_cpu_state();
116 fpsimd_save();
117 fpsimd_flush_cpu_state();
118 116
119 if (guest_has_sve) 117 if (guest_has_sve)
120 *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); 118 *guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index c2afa7982047..dfd626447482 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -208,7 +208,7 @@ out:
208 208
209#define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64) 209#define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
210#define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) 210#define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
211#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq)) 211#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
212 212
213static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 213static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
214{ 214{
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index b0041812bca9..58f281b6ca4a 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -604,7 +604,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
604 * Naturally, we want to avoid this. 604 * Naturally, we want to avoid this.
605 */ 605 */
606 if (system_uses_irq_prio_masking()) { 606 if (system_uses_irq_prio_masking()) {
607 gic_write_pmr(GIC_PRIO_IRQON); 607 gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
608 dsb(sy); 608 dsb(sy);
609 } 609 }
610 610
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 5992eb9a9a08..1d17dbeafe76 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -80,10 +80,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
80 80
81static int __init arm64_dma_init(void) 81static int __init arm64_dma_init(void)
82{ 82{
83 WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
84 TAINT_CPU_OUT_OF_SPEC,
85 "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
86 ARCH_DMA_MINALIGN, cache_line_size());
87 return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); 83 return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
88} 84}
89arch_initcall(arm64_dma_init); 85arch_initcall(arm64_dma_init);
@@ -461,6 +457,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
461void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 457void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
462 const struct iommu_ops *iommu, bool coherent) 458 const struct iommu_ops *iommu, bool coherent)
463{ 459{
460 int cls = cache_line_size_of_cpu();
461
462 WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN,
463 TAINT_CPU_OUT_OF_SPEC,
464 "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
465 dev_driver_string(dev), dev_name(dev),
466 ARCH_DMA_MINALIGN, cls);
467
464 dev->dma_coherent = coherent; 468 dev->dma_coherent = coherent;
465 __iommu_setup_dma_ops(dev, dma_base, size, iommu); 469 __iommu_setup_dma_ops(dev, dma_base, size, iommu);
466 470
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2d115016feb4..c8c61b1eb479 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -384,40 +384,31 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
384#define VM_FAULT_BADACCESS 0x020000 384#define VM_FAULT_BADACCESS 0x020000
385 385
386static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, 386static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
387 unsigned int mm_flags, unsigned long vm_flags, 387 unsigned int mm_flags, unsigned long vm_flags)
388 struct task_struct *tsk)
389{ 388{
390 struct vm_area_struct *vma; 389 struct vm_area_struct *vma = find_vma(mm, addr);
391 vm_fault_t fault;
392 390
393 vma = find_vma(mm, addr);
394 fault = VM_FAULT_BADMAP;
395 if (unlikely(!vma)) 391 if (unlikely(!vma))
396 goto out; 392 return VM_FAULT_BADMAP;
397 if (unlikely(vma->vm_start > addr))
398 goto check_stack;
399 393
400 /* 394 /*
401 * Ok, we have a good vm_area for this memory access, so we can handle 395 * Ok, we have a good vm_area for this memory access, so we can handle
402 * it. 396 * it.
403 */ 397 */
404good_area: 398 if (unlikely(vma->vm_start > addr)) {
399 if (!(vma->vm_flags & VM_GROWSDOWN))
400 return VM_FAULT_BADMAP;
401 if (expand_stack(vma, addr))
402 return VM_FAULT_BADMAP;
403 }
404
405 /* 405 /*
406 * Check that the permissions on the VMA allow for the fault which 406 * Check that the permissions on the VMA allow for the fault which
407 * occurred. 407 * occurred.
408 */ 408 */
409 if (!(vma->vm_flags & vm_flags)) { 409 if (!(vma->vm_flags & vm_flags))
410 fault = VM_FAULT_BADACCESS; 410 return VM_FAULT_BADACCESS;
411 goto out;
412 }
413
414 return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); 411 return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
415
416check_stack:
417 if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
418 goto good_area;
419out:
420 return fault;
421} 412}
422 413
423static bool is_el0_instruction_abort(unsigned int esr) 414static bool is_el0_instruction_abort(unsigned int esr)
@@ -425,12 +416,20 @@ static bool is_el0_instruction_abort(unsigned int esr)
425 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; 416 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
426} 417}
427 418
419/*
420 * Note: not valid for EL1 DC IVAC, but we never use that such that it
421 * should fault. EL0 cannot issue DC IVAC (undef).
422 */
423static bool is_write_abort(unsigned int esr)
424{
425 return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
426}
427
428static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, 428static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
429 struct pt_regs *regs) 429 struct pt_regs *regs)
430{ 430{
431 const struct fault_info *inf; 431 const struct fault_info *inf;
432 struct task_struct *tsk; 432 struct mm_struct *mm = current->mm;
433 struct mm_struct *mm;
434 vm_fault_t fault, major = 0; 433 vm_fault_t fault, major = 0;
435 unsigned long vm_flags = VM_READ | VM_WRITE; 434 unsigned long vm_flags = VM_READ | VM_WRITE;
436 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 435 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -438,9 +437,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
438 if (notify_page_fault(regs, esr)) 437 if (notify_page_fault(regs, esr))
439 return 0; 438 return 0;
440 439
441 tsk = current;
442 mm = tsk->mm;
443
444 /* 440 /*
445 * If we're in an interrupt or have no user context, we must not take 441 * If we're in an interrupt or have no user context, we must not take
446 * the fault. 442 * the fault.
@@ -453,7 +449,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
453 449
454 if (is_el0_instruction_abort(esr)) { 450 if (is_el0_instruction_abort(esr)) {
455 vm_flags = VM_EXEC; 451 vm_flags = VM_EXEC;
456 } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { 452 mm_flags |= FAULT_FLAG_INSTRUCTION;
453 } else if (is_write_abort(esr)) {
457 vm_flags = VM_WRITE; 454 vm_flags = VM_WRITE;
458 mm_flags |= FAULT_FLAG_WRITE; 455 mm_flags |= FAULT_FLAG_WRITE;
459 } 456 }
@@ -492,12 +489,14 @@ retry:
492 */ 489 */
493 might_sleep(); 490 might_sleep();
494#ifdef CONFIG_DEBUG_VM 491#ifdef CONFIG_DEBUG_VM
495 if (!user_mode(regs) && !search_exception_tables(regs->pc)) 492 if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
493 up_read(&mm->mmap_sem);
496 goto no_context; 494 goto no_context;
495 }
497#endif 496#endif
498 } 497 }
499 498
500 fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); 499 fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
501 major |= fault & VM_FAULT_MAJOR; 500 major |= fault & VM_FAULT_MAJOR;
502 501
503 if (fault & VM_FAULT_RETRY) { 502 if (fault & VM_FAULT_RETRY) {
@@ -537,11 +536,11 @@ retry:
537 * that point. 536 * that point.
538 */ 537 */
539 if (major) { 538 if (major) {
540 tsk->maj_flt++; 539 current->maj_flt++;
541 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, 540 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
542 addr); 541 addr);
543 } else { 542 } else {
544 tsk->min_flt++; 543 current->min_flt++;
545 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, 544 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
546 addr); 545 addr);
547 } 546 }
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index f475e54fbc43..bbeb6a5a6ba6 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -228,7 +228,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
228 228
229 if (sz == PUD_SIZE) { 229 if (sz == PUD_SIZE) {
230 ptep = (pte_t *)pudp; 230 ptep = (pte_t *)pudp;
231 } else if (sz == (PAGE_SIZE * CONT_PTES)) { 231 } else if (sz == (CONT_PTE_SIZE)) {
232 pmdp = pmd_alloc(mm, pudp, addr); 232 pmdp = pmd_alloc(mm, pudp, addr);
233 233
234 WARN_ON(addr & (sz - 1)); 234 WARN_ON(addr & (sz - 1));
@@ -246,7 +246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
246 ptep = huge_pmd_share(mm, addr, pudp); 246 ptep = huge_pmd_share(mm, addr, pudp);
247 else 247 else
248 ptep = (pte_t *)pmd_alloc(mm, pudp, addr); 248 ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
249 } else if (sz == (PMD_SIZE * CONT_PMDS)) { 249 } else if (sz == (CONT_PMD_SIZE)) {
250 pmdp = pmd_alloc(mm, pudp, addr); 250 pmdp = pmd_alloc(mm, pudp, addr);
251 WARN_ON(addr & (sz - 1)); 251 WARN_ON(addr & (sz - 1));
252 return (pte_t *)pmdp; 252 return (pte_t *)pmdp;
@@ -454,9 +454,9 @@ static int __init hugetlbpage_init(void)
454#ifdef CONFIG_ARM64_4K_PAGES 454#ifdef CONFIG_ARM64_4K_PAGES
455 add_huge_page_size(PUD_SIZE); 455 add_huge_page_size(PUD_SIZE);
456#endif 456#endif
457 add_huge_page_size(PMD_SIZE * CONT_PMDS); 457 add_huge_page_size(CONT_PMD_SIZE);
458 add_huge_page_size(PMD_SIZE); 458 add_huge_page_size(PMD_SIZE);
459 add_huge_page_size(PAGE_SIZE * CONT_PTES); 459 add_huge_page_size(CONT_PTE_SIZE);
460 460
461 return 0; 461 return 0;
462} 462}
@@ -470,9 +470,9 @@ static __init int setup_hugepagesz(char *opt)
470#ifdef CONFIG_ARM64_4K_PAGES 470#ifdef CONFIG_ARM64_4K_PAGES
471 case PUD_SIZE: 471 case PUD_SIZE:
472#endif 472#endif
473 case PMD_SIZE * CONT_PMDS: 473 case CONT_PMD_SIZE:
474 case PMD_SIZE: 474 case PMD_SIZE:
475 case PAGE_SIZE * CONT_PTES: 475 case CONT_PTE_SIZE:
476 add_huge_page_size(ps); 476 add_huge_page_size(ps);
477 return 1; 477 return 1;
478 } 478 }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 749c9b269f08..f3c795278def 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -180,8 +180,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
180{ 180{
181 unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; 181 unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
182 182
183 if (IS_ENABLED(CONFIG_ZONE_DMA32)) 183#ifdef CONFIG_ZONE_DMA32
184 max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); 184 max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
185#endif
185 max_zone_pfns[ZONE_NORMAL] = max; 186 max_zone_pfns[ZONE_NORMAL] = max;
186 187
187 free_area_init_nodes(max_zone_pfns); 188 free_area_init_nodes(max_zone_pfns);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e5ae8663f230..3645f29bd814 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -765,7 +765,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
765 765
766 return 0; 766 return 0;
767} 767}
768#endif /* CONFIG_ARM64_64K_PAGES */ 768#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */
769void vmemmap_free(unsigned long start, unsigned long end, 769void vmemmap_free(unsigned long start, unsigned long end,
770 struct vmem_altmap *altmap) 770 struct vmem_altmap *altmap)
771{ 771{
@@ -960,32 +960,28 @@ int __init arch_ioremap_pmd_supported(void)
960 960
961int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) 961int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
962{ 962{
963 pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | 963 pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
964 pgprot_val(mk_sect_prot(prot)));
965 pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
966 964
967 /* Only allow permission changes for now */ 965 /* Only allow permission changes for now */
968 if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), 966 if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
969 pud_val(new_pud))) 967 pud_val(new_pud)))
970 return 0; 968 return 0;
971 969
972 BUG_ON(phys & ~PUD_MASK); 970 VM_BUG_ON(phys & ~PUD_MASK);
973 set_pud(pudp, new_pud); 971 set_pud(pudp, new_pud);
974 return 1; 972 return 1;
975} 973}
976 974
977int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) 975int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
978{ 976{
979 pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | 977 pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot));
980 pgprot_val(mk_sect_prot(prot)));
981 pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
982 978
983 /* Only allow permission changes for now */ 979 /* Only allow permission changes for now */
984 if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), 980 if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
985 pmd_val(new_pmd))) 981 pmd_val(new_pmd)))
986 return 0; 982 return 0;
987 983
988 BUG_ON(phys & ~PMD_MASK); 984 VM_BUG_ON(phys & ~PMD_MASK);
989 set_pmd(pmdp, new_pmd); 985 set_pmd(pmdp, new_pmd);
990 return 1; 986 return 1;
991} 987}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 47b057bfa803..fcdcf6cd7677 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -151,17 +151,48 @@ int set_memory_valid(unsigned long addr, int numpages, int enable)
151 __pgprot(PTE_VALID)); 151 __pgprot(PTE_VALID));
152} 152}
153 153
154#ifdef CONFIG_DEBUG_PAGEALLOC 154int set_direct_map_invalid_noflush(struct page *page)
155{
156 struct page_change_data data = {
157 .set_mask = __pgprot(0),
158 .clear_mask = __pgprot(PTE_VALID),
159 };
160
161 if (!rodata_full)
162 return 0;
163
164 return apply_to_page_range(&init_mm,
165 (unsigned long)page_address(page),
166 PAGE_SIZE, change_page_range, &data);
167}
168
169int set_direct_map_default_noflush(struct page *page)
170{
171 struct page_change_data data = {
172 .set_mask = __pgprot(PTE_VALID | PTE_WRITE),
173 .clear_mask = __pgprot(PTE_RDONLY),
174 };
175
176 if (!rodata_full)
177 return 0;
178
179 return apply_to_page_range(&init_mm,
180 (unsigned long)page_address(page),
181 PAGE_SIZE, change_page_range, &data);
182}
183
155void __kernel_map_pages(struct page *page, int numpages, int enable) 184void __kernel_map_pages(struct page *page, int numpages, int enable)
156{ 185{
186 if (!debug_pagealloc_enabled() && !rodata_full)
187 return;
188
157 set_memory_valid((unsigned long)page_address(page), numpages, enable); 189 set_memory_valid((unsigned long)page_address(page), numpages, enable);
158} 190}
159#ifdef CONFIG_HIBERNATION 191
160/* 192/*
161 * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function 193 * This function is used to determine if a linear map page has been marked as
162 * is used to determine if a linear map page has been marked as not-valid by 194 * not-valid. Walk the page table and check the PTE_VALID bit. This is based
163 * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. 195 * on kern_addr_valid(), which almost does what we need.
164 * This is based on kern_addr_valid(), which almost does what we need.
165 * 196 *
166 * Because this is only called on the kernel linear map, p?d_sect() implies 197 * Because this is only called on the kernel linear map, p?d_sect() implies
167 * p?d_present(). When debug_pagealloc is enabled, sections mappings are 198 * p?d_present(). When debug_pagealloc is enabled, sections mappings are
@@ -175,6 +206,9 @@ bool kernel_page_present(struct page *page)
175 pte_t *ptep; 206 pte_t *ptep;
176 unsigned long addr = (unsigned long)page_address(page); 207 unsigned long addr = (unsigned long)page_address(page);
177 208
209 if (!debug_pagealloc_enabled() && !rodata_full)
210 return true;
211
178 pgdp = pgd_offset_k(addr); 212 pgdp = pgd_offset_k(addr);
179 if (pgd_none(READ_ONCE(*pgdp))) 213 if (pgd_none(READ_ONCE(*pgdp)))
180 return false; 214 return false;
@@ -196,5 +230,3 @@ bool kernel_page_present(struct page *page)
196 ptep = pte_offset_kernel(pmdp, addr); 230 ptep = pte_offset_kernel(pmdp, addr);
197 return pte_valid(READ_ONCE(*ptep)); 231 return pte_valid(READ_ONCE(*ptep));
198} 232}
199#endif /* CONFIG_HIBERNATION */
200#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 87c568807925..f5b437f8a22b 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -970,7 +970,7 @@ void *bpf_jit_alloc_exec(unsigned long size)
970{ 970{
971 return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, 971 return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
972 BPF_JIT_REGION_END, GFP_KERNEL, 972 BPF_JIT_REGION_END, GFP_KERNEL,
973 PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, 973 PAGE_KERNEL, 0, NUMA_NO_NODE,
974 __builtin_return_address(0)); 974 __builtin_return_address(0));
975} 975}
976 976
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index 04a43cfd4e09..d47a3381aad8 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -39,6 +39,11 @@ static int save_fpu_state(struct sigcontext __user *sc)
39#endif 39#endif
40 40
41struct rt_sigframe { 41struct rt_sigframe {
42 /*
43 * pad[3] is compatible with the same struct defined in
44 * gcc/libgcc/config/csky/linux-unwind.h
45 */
46 int pad[3];
42 struct siginfo info; 47 struct siginfo info;
43 struct ucontext uc; 48 struct ucontext uc;
44}; 49};
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 206530d0751b..50440f3ddc43 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -124,10 +124,10 @@ ATOMIC_FETCH_OP(xor, ^)
124#undef ATOMIC_OP 124#undef ATOMIC_OP
125 125
126#define ATOMIC64_OP(op, c_op) \ 126#define ATOMIC64_OP(op, c_op) \
127static __inline__ long \ 127static __inline__ s64 \
128ia64_atomic64_##op (__s64 i, atomic64_t *v) \ 128ia64_atomic64_##op (s64 i, atomic64_t *v) \
129{ \ 129{ \
130 __s64 old, new; \ 130 s64 old, new; \
131 CMPXCHG_BUGCHECK_DECL \ 131 CMPXCHG_BUGCHECK_DECL \
132 \ 132 \
133 do { \ 133 do { \
@@ -139,10 +139,10 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \
139} 139}
140 140
141#define ATOMIC64_FETCH_OP(op, c_op) \ 141#define ATOMIC64_FETCH_OP(op, c_op) \
142static __inline__ long \ 142static __inline__ s64 \
143ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \ 143ia64_atomic64_fetch_##op (s64 i, atomic64_t *v) \
144{ \ 144{ \
145 __s64 old, new; \ 145 s64 old, new; \
146 CMPXCHG_BUGCHECK_DECL \ 146 CMPXCHG_BUGCHECK_DECL \
147 \ 147 \
148 do { \ 148 do { \
@@ -162,7 +162,7 @@ ATOMIC64_OPS(sub, -)
162 162
163#define atomic64_add_return(i,v) \ 163#define atomic64_add_return(i,v) \
164({ \ 164({ \
165 long __ia64_aar_i = (i); \ 165 s64 __ia64_aar_i = (i); \
166 __ia64_atomic_const(i) \ 166 __ia64_atomic_const(i) \
167 ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ 167 ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \
168 : ia64_atomic64_add(__ia64_aar_i, v); \ 168 : ia64_atomic64_add(__ia64_aar_i, v); \
@@ -170,7 +170,7 @@ ATOMIC64_OPS(sub, -)
170 170
171#define atomic64_sub_return(i,v) \ 171#define atomic64_sub_return(i,v) \
172({ \ 172({ \
173 long __ia64_asr_i = (i); \ 173 s64 __ia64_asr_i = (i); \
174 __ia64_atomic_const(i) \ 174 __ia64_atomic_const(i) \
175 ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ 175 ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \
176 : ia64_atomic64_sub(__ia64_asr_i, v); \ 176 : ia64_atomic64_sub(__ia64_asr_i, v); \
@@ -178,7 +178,7 @@ ATOMIC64_OPS(sub, -)
178 178
179#define atomic64_fetch_add(i,v) \ 179#define atomic64_fetch_add(i,v) \
180({ \ 180({ \
181 long __ia64_aar_i = (i); \ 181 s64 __ia64_aar_i = (i); \
182 __ia64_atomic_const(i) \ 182 __ia64_atomic_const(i) \
183 ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ 183 ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \
184 : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ 184 : ia64_atomic64_fetch_add(__ia64_aar_i, v); \
@@ -186,7 +186,7 @@ ATOMIC64_OPS(sub, -)
186 186
187#define atomic64_fetch_sub(i,v) \ 187#define atomic64_fetch_sub(i,v) \
188({ \ 188({ \
189 long __ia64_asr_i = (i); \ 189 s64 __ia64_asr_i = (i); \
190 __ia64_atomic_const(i) \ 190 __ia64_atomic_const(i) \
191 ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ 191 ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \
192 : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ 192 : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 58a6337c0690..7c52bd2695a2 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6390,11 +6390,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
6390 } 6390 }
6391 6391
6392 /* save the current system wide pmu states */ 6392 /* save the current system wide pmu states */
6393 ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); 6393 on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
6394 if (ret) {
6395 DPRINT(("on_each_cpu() failed: %d\n", ret));
6396 goto cleanup_reserve;
6397 }
6398 6394
6399 /* officially change to the alternate interrupt handler */ 6395 /* officially change to the alternate interrupt handler */
6400 pfm_alt_intr_handler = hdl; 6396 pfm_alt_intr_handler = hdl;
@@ -6421,7 +6417,6 @@ int
6421pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6417pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
6422{ 6418{
6423 int i; 6419 int i;
6424 int ret;
6425 6420
6426 if (hdl == NULL) return -EINVAL; 6421 if (hdl == NULL) return -EINVAL;
6427 6422
@@ -6435,10 +6430,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
6435 6430
6436 pfm_alt_intr_handler = NULL; 6431 pfm_alt_intr_handler = NULL;
6437 6432
6438 ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); 6433 on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
6439 if (ret) {
6440 DPRINT(("on_each_cpu() failed: %d\n", ret));
6441 }
6442 6434
6443 for_each_online_cpu(i) { 6435 for_each_online_cpu(i) {
6444 pfm_unreserve_session(NULL, 1, i); 6436 pfm_unreserve_session(NULL, 1, i);
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index edcdfc149311..16c6d377c502 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -121,8 +121,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
121 status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); 121 status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
122 if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { 122 if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
123 atomic_set(&uc_pool->status, 0); 123 atomic_set(&uc_pool->status, 0);
124 status = smp_call_function(uncached_ipi_visibility, uc_pool, 1); 124 smp_call_function(uncached_ipi_visibility, uc_pool, 1);
125 if (status || atomic_read(&uc_pool->status)) 125 if (atomic_read(&uc_pool->status))
126 goto failed; 126 goto failed;
127 } else if (status != PAL_VISIBILITY_OK) 127 } else if (status != PAL_VISIBILITY_OK)
128 goto failed; 128 goto failed;
@@ -143,8 +143,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
143 if (status != PAL_STATUS_SUCCESS) 143 if (status != PAL_STATUS_SUCCESS)
144 goto failed; 144 goto failed;
145 atomic_set(&uc_pool->status, 0); 145 atomic_set(&uc_pool->status, 0);
146 status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); 146 smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
147 if (status || atomic_read(&uc_pool->status)) 147 if (atomic_read(&uc_pool->status))
148 goto failed; 148 goto failed;
149 149
150 /* 150 /*
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 218e037ef901..00f5c98a5e05 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -3,10 +3,13 @@ config M68K
3 bool 3 bool
4 default y 4 default y
5 select ARCH_32BIT_OFF_T 5 select ARCH_32BIT_OFF_T
6 select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
7 select ARCH_HAS_DMA_PREP_COHERENT
6 select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA 8 select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
7 select ARCH_MIGHT_HAVE_PC_PARPORT if ISA 9 select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
8 select ARCH_NO_COHERENT_DMA_MMAP if !MMU 10 select ARCH_NO_COHERENT_DMA_MMAP if !MMU
9 select ARCH_NO_PREEMPT if !COLDFIRE 11 select ARCH_NO_PREEMPT if !COLDFIRE
12 select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
10 select HAVE_IDE 13 select HAVE_IDE
11 select HAVE_AOUT if MMU 14 select HAVE_AOUT if MMU
12 select HAVE_DEBUG_BUGVERBOSE 15 select HAVE_DEBUG_BUGVERBOSE
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index fea392cfcf1b..04e0f211afb3 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -71,9 +71,6 @@ CONFIG_INET_AH=m
71CONFIG_INET_ESP=m 71CONFIG_INET_ESP=m
72CONFIG_INET_ESP_OFFLOAD=m 72CONFIG_INET_ESP_OFFLOAD=m
73CONFIG_INET_IPCOMP=m 73CONFIG_INET_IPCOMP=m
74CONFIG_INET_XFRM_MODE_TRANSPORT=m
75CONFIG_INET_XFRM_MODE_TUNNEL=m
76CONFIG_INET_XFRM_MODE_BEET=m
77CONFIG_INET_DIAG=m 74CONFIG_INET_DIAG=m
78CONFIG_INET_UDP_DIAG=m 75CONFIG_INET_UDP_DIAG=m
79CONFIG_INET_RAW_DIAG=m 76CONFIG_INET_RAW_DIAG=m
@@ -205,7 +202,6 @@ CONFIG_IP_SET_HASH_NETNET=m
205CONFIG_IP_SET_HASH_NETPORT=m 202CONFIG_IP_SET_HASH_NETPORT=m
206CONFIG_IP_SET_HASH_NETIFACE=m 203CONFIG_IP_SET_HASH_NETIFACE=m
207CONFIG_IP_SET_LIST_SET=m 204CONFIG_IP_SET_LIST_SET=m
208CONFIG_NFT_CHAIN_ROUTE_IPV4=m
209CONFIG_NFT_DUP_IPV4=m 205CONFIG_NFT_DUP_IPV4=m
210CONFIG_NFT_FIB_IPV4=m 206CONFIG_NFT_FIB_IPV4=m
211CONFIG_NF_TABLES_ARP=y 207CONFIG_NF_TABLES_ARP=y
@@ -231,7 +227,6 @@ CONFIG_IP_NF_RAW=m
231CONFIG_IP_NF_ARPTABLES=m 227CONFIG_IP_NF_ARPTABLES=m
232CONFIG_IP_NF_ARPFILTER=m 228CONFIG_IP_NF_ARPFILTER=m
233CONFIG_IP_NF_ARP_MANGLE=m 229CONFIG_IP_NF_ARP_MANGLE=m
234CONFIG_NFT_CHAIN_ROUTE_IPV6=m
235CONFIG_NFT_DUP_IPV6=m 230CONFIG_NFT_DUP_IPV6=m
236CONFIG_NFT_FIB_IPV6=m 231CONFIG_NFT_FIB_IPV6=m
237CONFIG_NF_FLOW_TABLE_IPV6=m 232CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -308,7 +303,6 @@ CONFIG_AF_KCM=m
308# CONFIG_WIRELESS is not set 303# CONFIG_WIRELESS is not set
309CONFIG_PSAMPLE=m 304CONFIG_PSAMPLE=m
310CONFIG_NET_IFE=m 305CONFIG_NET_IFE=m
311# CONFIG_UEVENT_HELPER is not set
312CONFIG_DEVTMPFS=y 306CONFIG_DEVTMPFS=y
313CONFIG_DEVTMPFS_MOUNT=y 307CONFIG_DEVTMPFS_MOUNT=y
314CONFIG_TEST_ASYNC_DRIVER_PROBE=m 308CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -436,6 +430,8 @@ CONFIG_FB_AMIGA_OCS=y
436CONFIG_FB_AMIGA_ECS=y 430CONFIG_FB_AMIGA_ECS=y
437CONFIG_FB_AMIGA_AGA=y 431CONFIG_FB_AMIGA_AGA=y
438CONFIG_FB_FM2=y 432CONFIG_FB_FM2=y
433# CONFIG_LCD_CLASS_DEVICE is not set
434# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
439CONFIG_FRAMEBUFFER_CONSOLE=y 435CONFIG_FRAMEBUFFER_CONSOLE=y
440CONFIG_LOGO=y 436CONFIG_LOGO=y
441CONFIG_SOUND=m 437CONFIG_SOUND=m
@@ -553,13 +549,14 @@ CONFIG_NLS_MAC_TURKISH=m
553CONFIG_DLM=m 549CONFIG_DLM=m
554CONFIG_ENCRYPTED_KEYS=m 550CONFIG_ENCRYPTED_KEYS=m
555CONFIG_HARDENED_USERCOPY=y 551CONFIG_HARDENED_USERCOPY=y
556CONFIG_CRYPTO_RSA=m
557CONFIG_CRYPTO_DH=m
558CONFIG_CRYPTO_ECDH=m
559CONFIG_CRYPTO_MANAGER=y 552CONFIG_CRYPTO_MANAGER=y
560CONFIG_CRYPTO_USER=m 553CONFIG_CRYPTO_USER=m
561CONFIG_CRYPTO_CRYPTD=m 554CONFIG_CRYPTO_CRYPTD=m
562CONFIG_CRYPTO_TEST=m 555CONFIG_CRYPTO_TEST=m
556CONFIG_CRYPTO_RSA=m
557CONFIG_CRYPTO_DH=m
558CONFIG_CRYPTO_ECDH=m
559CONFIG_CRYPTO_ECRDSA=m
563CONFIG_CRYPTO_CHACHA20POLY1305=m 560CONFIG_CRYPTO_CHACHA20POLY1305=m
564CONFIG_CRYPTO_AEGIS128=m 561CONFIG_CRYPTO_AEGIS128=m
565CONFIG_CRYPTO_AEGIS128L=m 562CONFIG_CRYPTO_AEGIS128L=m
@@ -583,7 +580,6 @@ CONFIG_CRYPTO_RMD256=m
583CONFIG_CRYPTO_RMD320=m 580CONFIG_CRYPTO_RMD320=m
584CONFIG_CRYPTO_SHA3=m 581CONFIG_CRYPTO_SHA3=m
585CONFIG_CRYPTO_SM3=m 582CONFIG_CRYPTO_SM3=m
586CONFIG_CRYPTO_STREEBOG=m
587CONFIG_CRYPTO_TGR192=m 583CONFIG_CRYPTO_TGR192=m
588CONFIG_CRYPTO_WP512=m 584CONFIG_CRYPTO_WP512=m
589CONFIG_CRYPTO_AES_TI=m 585CONFIG_CRYPTO_AES_TI=m
@@ -626,6 +622,7 @@ CONFIG_ATOMIC64_SELFTEST=m
626CONFIG_ASYNC_RAID6_TEST=m 622CONFIG_ASYNC_RAID6_TEST=m
627CONFIG_TEST_HEXDUMP=m 623CONFIG_TEST_HEXDUMP=m
628CONFIG_TEST_STRING_HELPERS=m 624CONFIG_TEST_STRING_HELPERS=m
625CONFIG_TEST_STRSCPY=m
629CONFIG_TEST_KSTRTOX=m 626CONFIG_TEST_KSTRTOX=m
630CONFIG_TEST_PRINTF=m 627CONFIG_TEST_PRINTF=m
631CONFIG_TEST_BITMAP=m 628CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 2474d267460e..c6abbb535878 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -67,9 +67,6 @@ CONFIG_INET_AH=m
67CONFIG_INET_ESP=m 67CONFIG_INET_ESP=m
68CONFIG_INET_ESP_OFFLOAD=m 68CONFIG_INET_ESP_OFFLOAD=m
69CONFIG_INET_IPCOMP=m 69CONFIG_INET_IPCOMP=m
70CONFIG_INET_XFRM_MODE_TRANSPORT=m
71CONFIG_INET_XFRM_MODE_TUNNEL=m
72CONFIG_INET_XFRM_MODE_BEET=m
73CONFIG_INET_DIAG=m 70CONFIG_INET_DIAG=m
74CONFIG_INET_UDP_DIAG=m 71CONFIG_INET_UDP_DIAG=m
75CONFIG_INET_RAW_DIAG=m 72CONFIG_INET_RAW_DIAG=m
@@ -201,7 +198,6 @@ CONFIG_IP_SET_HASH_NETNET=m
201CONFIG_IP_SET_HASH_NETPORT=m 198CONFIG_IP_SET_HASH_NETPORT=m
202CONFIG_IP_SET_HASH_NETIFACE=m 199CONFIG_IP_SET_HASH_NETIFACE=m
203CONFIG_IP_SET_LIST_SET=m 200CONFIG_IP_SET_LIST_SET=m
204CONFIG_NFT_CHAIN_ROUTE_IPV4=m
205CONFIG_NFT_DUP_IPV4=m 201CONFIG_NFT_DUP_IPV4=m
206CONFIG_NFT_FIB_IPV4=m 202CONFIG_NFT_FIB_IPV4=m
207CONFIG_NF_TABLES_ARP=y 203CONFIG_NF_TABLES_ARP=y
@@ -227,7 +223,6 @@ CONFIG_IP_NF_RAW=m
227CONFIG_IP_NF_ARPTABLES=m 223CONFIG_IP_NF_ARPTABLES=m
228CONFIG_IP_NF_ARPFILTER=m 224CONFIG_IP_NF_ARPFILTER=m
229CONFIG_IP_NF_ARP_MANGLE=m 225CONFIG_IP_NF_ARP_MANGLE=m
230CONFIG_NFT_CHAIN_ROUTE_IPV6=m
231CONFIG_NFT_DUP_IPV6=m 226CONFIG_NFT_DUP_IPV6=m
232CONFIG_NFT_FIB_IPV6=m 227CONFIG_NFT_FIB_IPV6=m
233CONFIG_NF_FLOW_TABLE_IPV6=m 228CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -304,7 +299,6 @@ CONFIG_AF_KCM=m
304# CONFIG_WIRELESS is not set 299# CONFIG_WIRELESS is not set
305CONFIG_PSAMPLE=m 300CONFIG_PSAMPLE=m
306CONFIG_NET_IFE=m 301CONFIG_NET_IFE=m
307# CONFIG_UEVENT_HELPER is not set
308CONFIG_DEVTMPFS=y 302CONFIG_DEVTMPFS=y
309CONFIG_DEVTMPFS_MOUNT=y 303CONFIG_DEVTMPFS_MOUNT=y
310CONFIG_TEST_ASYNC_DRIVER_PROBE=m 304CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -397,6 +391,8 @@ CONFIG_PPS_CLIENT_LDISC=m
397CONFIG_PTP_1588_CLOCK=m 391CONFIG_PTP_1588_CLOCK=m
398# CONFIG_HWMON is not set 392# CONFIG_HWMON is not set
399CONFIG_FB=y 393CONFIG_FB=y
394# CONFIG_LCD_CLASS_DEVICE is not set
395# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
400CONFIG_FRAMEBUFFER_CONSOLE=y 396CONFIG_FRAMEBUFFER_CONSOLE=y
401CONFIG_LOGO=y 397CONFIG_LOGO=y
402# CONFIG_LOGO_LINUX_VGA16 is not set 398# CONFIG_LOGO_LINUX_VGA16 is not set
@@ -513,13 +509,14 @@ CONFIG_NLS_MAC_TURKISH=m
513CONFIG_DLM=m 509CONFIG_DLM=m
514CONFIG_ENCRYPTED_KEYS=m 510CONFIG_ENCRYPTED_KEYS=m
515CONFIG_HARDENED_USERCOPY=y 511CONFIG_HARDENED_USERCOPY=y
516CONFIG_CRYPTO_RSA=m
517CONFIG_CRYPTO_DH=m
518CONFIG_CRYPTO_ECDH=m
519CONFIG_CRYPTO_MANAGER=y 512CONFIG_CRYPTO_MANAGER=y
520CONFIG_CRYPTO_USER=m 513CONFIG_CRYPTO_USER=m
521CONFIG_CRYPTO_CRYPTD=m 514CONFIG_CRYPTO_CRYPTD=m
522CONFIG_CRYPTO_TEST=m 515CONFIG_CRYPTO_TEST=m
516CONFIG_CRYPTO_RSA=m
517CONFIG_CRYPTO_DH=m
518CONFIG_CRYPTO_ECDH=m
519CONFIG_CRYPTO_ECRDSA=m
523CONFIG_CRYPTO_CHACHA20POLY1305=m 520CONFIG_CRYPTO_CHACHA20POLY1305=m
524CONFIG_CRYPTO_AEGIS128=m 521CONFIG_CRYPTO_AEGIS128=m
525CONFIG_CRYPTO_AEGIS128L=m 522CONFIG_CRYPTO_AEGIS128L=m
@@ -543,7 +540,6 @@ CONFIG_CRYPTO_RMD256=m
543CONFIG_CRYPTO_RMD320=m 540CONFIG_CRYPTO_RMD320=m
544CONFIG_CRYPTO_SHA3=m 541CONFIG_CRYPTO_SHA3=m
545CONFIG_CRYPTO_SM3=m 542CONFIG_CRYPTO_SM3=m
546CONFIG_CRYPTO_STREEBOG=m
547CONFIG_CRYPTO_TGR192=m 543CONFIG_CRYPTO_TGR192=m
548CONFIG_CRYPTO_WP512=m 544CONFIG_CRYPTO_WP512=m
549CONFIG_CRYPTO_AES_TI=m 545CONFIG_CRYPTO_AES_TI=m
@@ -586,6 +582,7 @@ CONFIG_ATOMIC64_SELFTEST=m
586CONFIG_ASYNC_RAID6_TEST=m 582CONFIG_ASYNC_RAID6_TEST=m
587CONFIG_TEST_HEXDUMP=m 583CONFIG_TEST_HEXDUMP=m
588CONFIG_TEST_STRING_HELPERS=m 584CONFIG_TEST_STRING_HELPERS=m
585CONFIG_TEST_STRSCPY=m
589CONFIG_TEST_KSTRTOX=m 586CONFIG_TEST_KSTRTOX=m
590CONFIG_TEST_PRINTF=m 587CONFIG_TEST_PRINTF=m
591CONFIG_TEST_BITMAP=m 588CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 0fc7d2992fe0..06ae65bad177 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -74,9 +74,6 @@ CONFIG_INET_AH=m
74CONFIG_INET_ESP=m 74CONFIG_INET_ESP=m
75CONFIG_INET_ESP_OFFLOAD=m 75CONFIG_INET_ESP_OFFLOAD=m
76CONFIG_INET_IPCOMP=m 76CONFIG_INET_IPCOMP=m
77CONFIG_INET_XFRM_MODE_TRANSPORT=m
78CONFIG_INET_XFRM_MODE_TUNNEL=m
79CONFIG_INET_XFRM_MODE_BEET=m
80CONFIG_INET_DIAG=m 77CONFIG_INET_DIAG=m
81CONFIG_INET_UDP_DIAG=m 78CONFIG_INET_UDP_DIAG=m
82CONFIG_INET_RAW_DIAG=m 79CONFIG_INET_RAW_DIAG=m
@@ -208,7 +205,6 @@ CONFIG_IP_SET_HASH_NETNET=m
208CONFIG_IP_SET_HASH_NETPORT=m 205CONFIG_IP_SET_HASH_NETPORT=m
209CONFIG_IP_SET_HASH_NETIFACE=m 206CONFIG_IP_SET_HASH_NETIFACE=m
210CONFIG_IP_SET_LIST_SET=m 207CONFIG_IP_SET_LIST_SET=m
211CONFIG_NFT_CHAIN_ROUTE_IPV4=m
212CONFIG_NFT_DUP_IPV4=m 208CONFIG_NFT_DUP_IPV4=m
213CONFIG_NFT_FIB_IPV4=m 209CONFIG_NFT_FIB_IPV4=m
214CONFIG_NF_TABLES_ARP=y 210CONFIG_NF_TABLES_ARP=y
@@ -234,7 +230,6 @@ CONFIG_IP_NF_RAW=m
234CONFIG_IP_NF_ARPTABLES=m 230CONFIG_IP_NF_ARPTABLES=m
235CONFIG_IP_NF_ARPFILTER=m 231CONFIG_IP_NF_ARPFILTER=m
236CONFIG_IP_NF_ARP_MANGLE=m 232CONFIG_IP_NF_ARP_MANGLE=m
237CONFIG_NFT_CHAIN_ROUTE_IPV6=m
238CONFIG_NFT_DUP_IPV6=m 233CONFIG_NFT_DUP_IPV6=m
239CONFIG_NFT_FIB_IPV6=m 234CONFIG_NFT_FIB_IPV6=m
240CONFIG_NF_FLOW_TABLE_IPV6=m 235CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -311,7 +306,6 @@ CONFIG_AF_KCM=m
311# CONFIG_WIRELESS is not set 306# CONFIG_WIRELESS is not set
312CONFIG_PSAMPLE=m 307CONFIG_PSAMPLE=m
313CONFIG_NET_IFE=m 308CONFIG_NET_IFE=m
314# CONFIG_UEVENT_HELPER is not set
315CONFIG_DEVTMPFS=y 309CONFIG_DEVTMPFS=y
316CONFIG_DEVTMPFS_MOUNT=y 310CONFIG_DEVTMPFS_MOUNT=y
317CONFIG_TEST_ASYNC_DRIVER_PROBE=m 311CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -421,6 +415,8 @@ CONFIG_PTP_1588_CLOCK=m
421# CONFIG_HWMON is not set 415# CONFIG_HWMON is not set
422CONFIG_FB=y 416CONFIG_FB=y
423CONFIG_FB_ATARI=y 417CONFIG_FB_ATARI=y
418# CONFIG_LCD_CLASS_DEVICE is not set
419# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
424CONFIG_FRAMEBUFFER_CONSOLE=y 420CONFIG_FRAMEBUFFER_CONSOLE=y
425CONFIG_LOGO=y 421CONFIG_LOGO=y
426CONFIG_SOUND=m 422CONFIG_SOUND=m
@@ -535,13 +531,14 @@ CONFIG_NLS_MAC_TURKISH=m
535CONFIG_DLM=m 531CONFIG_DLM=m
536CONFIG_ENCRYPTED_KEYS=m 532CONFIG_ENCRYPTED_KEYS=m
537CONFIG_HARDENED_USERCOPY=y 533CONFIG_HARDENED_USERCOPY=y
538CONFIG_CRYPTO_RSA=m
539CONFIG_CRYPTO_DH=m
540CONFIG_CRYPTO_ECDH=m
541CONFIG_CRYPTO_MANAGER=y 534CONFIG_CRYPTO_MANAGER=y
542CONFIG_CRYPTO_USER=m 535CONFIG_CRYPTO_USER=m
543CONFIG_CRYPTO_CRYPTD=m 536CONFIG_CRYPTO_CRYPTD=m
544CONFIG_CRYPTO_TEST=m 537CONFIG_CRYPTO_TEST=m
538CONFIG_CRYPTO_RSA=m
539CONFIG_CRYPTO_DH=m
540CONFIG_CRYPTO_ECDH=m
541CONFIG_CRYPTO_ECRDSA=m
545CONFIG_CRYPTO_CHACHA20POLY1305=m 542CONFIG_CRYPTO_CHACHA20POLY1305=m
546CONFIG_CRYPTO_AEGIS128=m 543CONFIG_CRYPTO_AEGIS128=m
547CONFIG_CRYPTO_AEGIS128L=m 544CONFIG_CRYPTO_AEGIS128L=m
@@ -565,7 +562,6 @@ CONFIG_CRYPTO_RMD256=m
565CONFIG_CRYPTO_RMD320=m 562CONFIG_CRYPTO_RMD320=m
566CONFIG_CRYPTO_SHA3=m 563CONFIG_CRYPTO_SHA3=m
567CONFIG_CRYPTO_SM3=m 564CONFIG_CRYPTO_SM3=m
568CONFIG_CRYPTO_STREEBOG=m
569CONFIG_CRYPTO_TGR192=m 565CONFIG_CRYPTO_TGR192=m
570CONFIG_CRYPTO_WP512=m 566CONFIG_CRYPTO_WP512=m
571CONFIG_CRYPTO_AES_TI=m 567CONFIG_CRYPTO_AES_TI=m
@@ -608,6 +604,7 @@ CONFIG_ATOMIC64_SELFTEST=m
608CONFIG_ASYNC_RAID6_TEST=m 604CONFIG_ASYNC_RAID6_TEST=m
609CONFIG_TEST_HEXDUMP=m 605CONFIG_TEST_HEXDUMP=m
610CONFIG_TEST_STRING_HELPERS=m 606CONFIG_TEST_STRING_HELPERS=m
607CONFIG_TEST_STRSCPY=m
611CONFIG_TEST_KSTRTOX=m 608CONFIG_TEST_KSTRTOX=m
612CONFIG_TEST_PRINTF=m 609CONFIG_TEST_PRINTF=m
613CONFIG_TEST_BITMAP=m 610CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 699df9fdf866..5616b94053b6 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -64,9 +64,6 @@ CONFIG_INET_AH=m
64CONFIG_INET_ESP=m 64CONFIG_INET_ESP=m
65CONFIG_INET_ESP_OFFLOAD=m 65CONFIG_INET_ESP_OFFLOAD=m
66CONFIG_INET_IPCOMP=m 66CONFIG_INET_IPCOMP=m
67CONFIG_INET_XFRM_MODE_TRANSPORT=m
68CONFIG_INET_XFRM_MODE_TUNNEL=m
69CONFIG_INET_XFRM_MODE_BEET=m
70CONFIG_INET_DIAG=m 67CONFIG_INET_DIAG=m
71CONFIG_INET_UDP_DIAG=m 68CONFIG_INET_UDP_DIAG=m
72CONFIG_INET_RAW_DIAG=m 69CONFIG_INET_RAW_DIAG=m
@@ -198,7 +195,6 @@ CONFIG_IP_SET_HASH_NETNET=m
198CONFIG_IP_SET_HASH_NETPORT=m 195CONFIG_IP_SET_HASH_NETPORT=m
199CONFIG_IP_SET_HASH_NETIFACE=m 196CONFIG_IP_SET_HASH_NETIFACE=m
200CONFIG_IP_SET_LIST_SET=m 197CONFIG_IP_SET_LIST_SET=m
201CONFIG_NFT_CHAIN_ROUTE_IPV4=m
202CONFIG_NFT_DUP_IPV4=m 198CONFIG_NFT_DUP_IPV4=m
203CONFIG_NFT_FIB_IPV4=m 199CONFIG_NFT_FIB_IPV4=m
204CONFIG_NF_TABLES_ARP=y 200CONFIG_NF_TABLES_ARP=y
@@ -224,7 +220,6 @@ CONFIG_IP_NF_RAW=m
224CONFIG_IP_NF_ARPTABLES=m 220CONFIG_IP_NF_ARPTABLES=m
225CONFIG_IP_NF_ARPFILTER=m 221CONFIG_IP_NF_ARPFILTER=m
226CONFIG_IP_NF_ARP_MANGLE=m 222CONFIG_IP_NF_ARP_MANGLE=m
227CONFIG_NFT_CHAIN_ROUTE_IPV6=m
228CONFIG_NFT_DUP_IPV6=m 223CONFIG_NFT_DUP_IPV6=m
229CONFIG_NFT_FIB_IPV6=m 224CONFIG_NFT_FIB_IPV6=m
230CONFIG_NF_FLOW_TABLE_IPV6=m 225CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -301,7 +296,6 @@ CONFIG_AF_KCM=m
301# CONFIG_WIRELESS is not set 296# CONFIG_WIRELESS is not set
302CONFIG_PSAMPLE=m 297CONFIG_PSAMPLE=m
303CONFIG_NET_IFE=m 298CONFIG_NET_IFE=m
304# CONFIG_UEVENT_HELPER is not set
305CONFIG_DEVTMPFS=y 299CONFIG_DEVTMPFS=y
306CONFIG_DEVTMPFS_MOUNT=y 300CONFIG_DEVTMPFS_MOUNT=y
307CONFIG_TEST_ASYNC_DRIVER_PROBE=m 301CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -394,6 +388,8 @@ CONFIG_NTP_PPS=y
394CONFIG_PPS_CLIENT_LDISC=m 388CONFIG_PPS_CLIENT_LDISC=m
395CONFIG_PTP_1588_CLOCK=m 389CONFIG_PTP_1588_CLOCK=m
396# CONFIG_HWMON is not set 390# CONFIG_HWMON is not set
391# CONFIG_LCD_CLASS_DEVICE is not set
392# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
397CONFIG_HID=m 393CONFIG_HID=m
398CONFIG_HIDRAW=y 394CONFIG_HIDRAW=y
399CONFIG_UHID=m 395CONFIG_UHID=m
@@ -506,13 +502,14 @@ CONFIG_NLS_MAC_TURKISH=m
506CONFIG_DLM=m 502CONFIG_DLM=m
507CONFIG_ENCRYPTED_KEYS=m 503CONFIG_ENCRYPTED_KEYS=m
508CONFIG_HARDENED_USERCOPY=y 504CONFIG_HARDENED_USERCOPY=y
509CONFIG_CRYPTO_RSA=m
510CONFIG_CRYPTO_DH=m
511CONFIG_CRYPTO_ECDH=m
512CONFIG_CRYPTO_MANAGER=y 505CONFIG_CRYPTO_MANAGER=y
513CONFIG_CRYPTO_USER=m 506CONFIG_CRYPTO_USER=m
514CONFIG_CRYPTO_CRYPTD=m 507CONFIG_CRYPTO_CRYPTD=m
515CONFIG_CRYPTO_TEST=m 508CONFIG_CRYPTO_TEST=m
509CONFIG_CRYPTO_RSA=m
510CONFIG_CRYPTO_DH=m
511CONFIG_CRYPTO_ECDH=m
512CONFIG_CRYPTO_ECRDSA=m
516CONFIG_CRYPTO_CHACHA20POLY1305=m 513CONFIG_CRYPTO_CHACHA20POLY1305=m
517CONFIG_CRYPTO_AEGIS128=m 514CONFIG_CRYPTO_AEGIS128=m
518CONFIG_CRYPTO_AEGIS128L=m 515CONFIG_CRYPTO_AEGIS128L=m
@@ -536,7 +533,6 @@ CONFIG_CRYPTO_RMD256=m
536CONFIG_CRYPTO_RMD320=m 533CONFIG_CRYPTO_RMD320=m
537CONFIG_CRYPTO_SHA3=m 534CONFIG_CRYPTO_SHA3=m
538CONFIG_CRYPTO_SM3=m 535CONFIG_CRYPTO_SM3=m
539CONFIG_CRYPTO_STREEBOG=m
540CONFIG_CRYPTO_TGR192=m 536CONFIG_CRYPTO_TGR192=m
541CONFIG_CRYPTO_WP512=m 537CONFIG_CRYPTO_WP512=m
542CONFIG_CRYPTO_AES_TI=m 538CONFIG_CRYPTO_AES_TI=m
@@ -579,6 +575,7 @@ CONFIG_ATOMIC64_SELFTEST=m
579CONFIG_ASYNC_RAID6_TEST=m 575CONFIG_ASYNC_RAID6_TEST=m
580CONFIG_TEST_HEXDUMP=m 576CONFIG_TEST_HEXDUMP=m
581CONFIG_TEST_STRING_HELPERS=m 577CONFIG_TEST_STRING_HELPERS=m
578CONFIG_TEST_STRSCPY=m
582CONFIG_TEST_KSTRTOX=m 579CONFIG_TEST_KSTRTOX=m
583CONFIG_TEST_PRINTF=m 580CONFIG_TEST_PRINTF=m
584CONFIG_TEST_BITMAP=m 581CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index b50802255324..1106521f3b56 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -66,9 +66,6 @@ CONFIG_INET_AH=m
66CONFIG_INET_ESP=m 66CONFIG_INET_ESP=m
67CONFIG_INET_ESP_OFFLOAD=m 67CONFIG_INET_ESP_OFFLOAD=m
68CONFIG_INET_IPCOMP=m 68CONFIG_INET_IPCOMP=m
69CONFIG_INET_XFRM_MODE_TRANSPORT=m
70CONFIG_INET_XFRM_MODE_TUNNEL=m
71CONFIG_INET_XFRM_MODE_BEET=m
72CONFIG_INET_DIAG=m 69CONFIG_INET_DIAG=m
73CONFIG_INET_UDP_DIAG=m 70CONFIG_INET_UDP_DIAG=m
74CONFIG_INET_RAW_DIAG=m 71CONFIG_INET_RAW_DIAG=m
@@ -200,7 +197,6 @@ CONFIG_IP_SET_HASH_NETNET=m
200CONFIG_IP_SET_HASH_NETPORT=m 197CONFIG_IP_SET_HASH_NETPORT=m
201CONFIG_IP_SET_HASH_NETIFACE=m 198CONFIG_IP_SET_HASH_NETIFACE=m
202CONFIG_IP_SET_LIST_SET=m 199CONFIG_IP_SET_LIST_SET=m
203CONFIG_NFT_CHAIN_ROUTE_IPV4=m
204CONFIG_NFT_DUP_IPV4=m 200CONFIG_NFT_DUP_IPV4=m
205CONFIG_NFT_FIB_IPV4=m 201CONFIG_NFT_FIB_IPV4=m
206CONFIG_NF_TABLES_ARP=y 202CONFIG_NF_TABLES_ARP=y
@@ -226,7 +222,6 @@ CONFIG_IP_NF_RAW=m
226CONFIG_IP_NF_ARPTABLES=m 222CONFIG_IP_NF_ARPTABLES=m
227CONFIG_IP_NF_ARPFILTER=m 223CONFIG_IP_NF_ARPFILTER=m
228CONFIG_IP_NF_ARP_MANGLE=m 224CONFIG_IP_NF_ARP_MANGLE=m
229CONFIG_NFT_CHAIN_ROUTE_IPV6=m
230CONFIG_NFT_DUP_IPV6=m 225CONFIG_NFT_DUP_IPV6=m
231CONFIG_NFT_FIB_IPV6=m 226CONFIG_NFT_FIB_IPV6=m
232CONFIG_NF_FLOW_TABLE_IPV6=m 227CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -303,7 +298,6 @@ CONFIG_AF_KCM=m
303# CONFIG_WIRELESS is not set 298# CONFIG_WIRELESS is not set
304CONFIG_PSAMPLE=m 299CONFIG_PSAMPLE=m
305CONFIG_NET_IFE=m 300CONFIG_NET_IFE=m
306# CONFIG_UEVENT_HELPER is not set
307CONFIG_DEVTMPFS=y 301CONFIG_DEVTMPFS=y
308CONFIG_DEVTMPFS_MOUNT=y 302CONFIG_DEVTMPFS_MOUNT=y
309CONFIG_TEST_ASYNC_DRIVER_PROBE=m 303CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -399,6 +393,8 @@ CONFIG_PPS_CLIENT_LDISC=m
399CONFIG_PTP_1588_CLOCK=m 393CONFIG_PTP_1588_CLOCK=m
400# CONFIG_HWMON is not set 394# CONFIG_HWMON is not set
401CONFIG_FB=y 395CONFIG_FB=y
396# CONFIG_LCD_CLASS_DEVICE is not set
397# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
402CONFIG_FRAMEBUFFER_CONSOLE=y 398CONFIG_FRAMEBUFFER_CONSOLE=y
403CONFIG_LOGO=y 399CONFIG_LOGO=y
404# CONFIG_LOGO_LINUX_MONO is not set 400# CONFIG_LOGO_LINUX_MONO is not set
@@ -515,13 +511,14 @@ CONFIG_NLS_MAC_TURKISH=m
515CONFIG_DLM=m 511CONFIG_DLM=m
516CONFIG_ENCRYPTED_KEYS=m 512CONFIG_ENCRYPTED_KEYS=m
517CONFIG_HARDENED_USERCOPY=y 513CONFIG_HARDENED_USERCOPY=y
518CONFIG_CRYPTO_RSA=m
519CONFIG_CRYPTO_DH=m
520CONFIG_CRYPTO_ECDH=m
521CONFIG_CRYPTO_MANAGER=y 514CONFIG_CRYPTO_MANAGER=y
522CONFIG_CRYPTO_USER=m 515CONFIG_CRYPTO_USER=m
523CONFIG_CRYPTO_CRYPTD=m 516CONFIG_CRYPTO_CRYPTD=m
524CONFIG_CRYPTO_TEST=m 517CONFIG_CRYPTO_TEST=m
518CONFIG_CRYPTO_RSA=m
519CONFIG_CRYPTO_DH=m
520CONFIG_CRYPTO_ECDH=m
521CONFIG_CRYPTO_ECRDSA=m
525CONFIG_CRYPTO_CHACHA20POLY1305=m 522CONFIG_CRYPTO_CHACHA20POLY1305=m
526CONFIG_CRYPTO_AEGIS128=m 523CONFIG_CRYPTO_AEGIS128=m
527CONFIG_CRYPTO_AEGIS128L=m 524CONFIG_CRYPTO_AEGIS128L=m
@@ -545,7 +542,6 @@ CONFIG_CRYPTO_RMD256=m
545CONFIG_CRYPTO_RMD320=m 542CONFIG_CRYPTO_RMD320=m
546CONFIG_CRYPTO_SHA3=m 543CONFIG_CRYPTO_SHA3=m
547CONFIG_CRYPTO_SM3=m 544CONFIG_CRYPTO_SM3=m
548CONFIG_CRYPTO_STREEBOG=m
549CONFIG_CRYPTO_TGR192=m 545CONFIG_CRYPTO_TGR192=m
550CONFIG_CRYPTO_WP512=m 546CONFIG_CRYPTO_WP512=m
551CONFIG_CRYPTO_AES_TI=m 547CONFIG_CRYPTO_AES_TI=m
@@ -588,6 +584,7 @@ CONFIG_ATOMIC64_SELFTEST=m
588CONFIG_ASYNC_RAID6_TEST=m 584CONFIG_ASYNC_RAID6_TEST=m
589CONFIG_TEST_HEXDUMP=m 585CONFIG_TEST_HEXDUMP=m
590CONFIG_TEST_STRING_HELPERS=m 586CONFIG_TEST_STRING_HELPERS=m
587CONFIG_TEST_STRSCPY=m
591CONFIG_TEST_KSTRTOX=m 588CONFIG_TEST_KSTRTOX=m
592CONFIG_TEST_PRINTF=m 589CONFIG_TEST_PRINTF=m
593CONFIG_TEST_BITMAP=m 590CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 04e7d70f6030..226c6c063cd4 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -65,9 +65,6 @@ CONFIG_INET_AH=m
65CONFIG_INET_ESP=m 65CONFIG_INET_ESP=m
66CONFIG_INET_ESP_OFFLOAD=m 66CONFIG_INET_ESP_OFFLOAD=m
67CONFIG_INET_IPCOMP=m 67CONFIG_INET_IPCOMP=m
68CONFIG_INET_XFRM_MODE_TRANSPORT=m
69CONFIG_INET_XFRM_MODE_TUNNEL=m
70CONFIG_INET_XFRM_MODE_BEET=m
71CONFIG_INET_DIAG=m 68CONFIG_INET_DIAG=m
72CONFIG_INET_UDP_DIAG=m 69CONFIG_INET_UDP_DIAG=m
73CONFIG_INET_RAW_DIAG=m 70CONFIG_INET_RAW_DIAG=m
@@ -199,7 +196,6 @@ CONFIG_IP_SET_HASH_NETNET=m
199CONFIG_IP_SET_HASH_NETPORT=m 196CONFIG_IP_SET_HASH_NETPORT=m
200CONFIG_IP_SET_HASH_NETIFACE=m 197CONFIG_IP_SET_HASH_NETIFACE=m
201CONFIG_IP_SET_LIST_SET=m 198CONFIG_IP_SET_LIST_SET=m
202CONFIG_NFT_CHAIN_ROUTE_IPV4=m
203CONFIG_NFT_DUP_IPV4=m 199CONFIG_NFT_DUP_IPV4=m
204CONFIG_NFT_FIB_IPV4=m 200CONFIG_NFT_FIB_IPV4=m
205CONFIG_NF_TABLES_ARP=y 201CONFIG_NF_TABLES_ARP=y
@@ -225,7 +221,6 @@ CONFIG_IP_NF_RAW=m
225CONFIG_IP_NF_ARPTABLES=m 221CONFIG_IP_NF_ARPTABLES=m
226CONFIG_IP_NF_ARPFILTER=m 222CONFIG_IP_NF_ARPFILTER=m
227CONFIG_IP_NF_ARP_MANGLE=m 223CONFIG_IP_NF_ARP_MANGLE=m
228CONFIG_NFT_CHAIN_ROUTE_IPV6=m
229CONFIG_NFT_DUP_IPV6=m 224CONFIG_NFT_DUP_IPV6=m
230CONFIG_NFT_FIB_IPV6=m 225CONFIG_NFT_FIB_IPV6=m
231CONFIG_NF_FLOW_TABLE_IPV6=m 226CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -305,7 +300,6 @@ CONFIG_AF_KCM=m
305# CONFIG_WIRELESS is not set 300# CONFIG_WIRELESS is not set
306CONFIG_PSAMPLE=m 301CONFIG_PSAMPLE=m
307CONFIG_NET_IFE=m 302CONFIG_NET_IFE=m
308# CONFIG_UEVENT_HELPER is not set
309CONFIG_DEVTMPFS=y 303CONFIG_DEVTMPFS=y
310CONFIG_DEVTMPFS_MOUNT=y 304CONFIG_DEVTMPFS_MOUNT=y
311CONFIG_TEST_ASYNC_DRIVER_PROBE=m 305CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -423,6 +417,8 @@ CONFIG_PTP_1588_CLOCK=m
423CONFIG_FB=y 417CONFIG_FB=y
424CONFIG_FB_VALKYRIE=y 418CONFIG_FB_VALKYRIE=y
425CONFIG_FB_MAC=y 419CONFIG_FB_MAC=y
420# CONFIG_LCD_CLASS_DEVICE is not set
421# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
426CONFIG_FRAMEBUFFER_CONSOLE=y 422CONFIG_FRAMEBUFFER_CONSOLE=y
427CONFIG_LOGO=y 423CONFIG_LOGO=y
428CONFIG_HID=m 424CONFIG_HID=m
@@ -537,13 +533,14 @@ CONFIG_NLS_MAC_TURKISH=m
537CONFIG_DLM=m 533CONFIG_DLM=m
538CONFIG_ENCRYPTED_KEYS=m 534CONFIG_ENCRYPTED_KEYS=m
539CONFIG_HARDENED_USERCOPY=y 535CONFIG_HARDENED_USERCOPY=y
540CONFIG_CRYPTO_RSA=m
541CONFIG_CRYPTO_DH=m
542CONFIG_CRYPTO_ECDH=m
543CONFIG_CRYPTO_MANAGER=y 536CONFIG_CRYPTO_MANAGER=y
544CONFIG_CRYPTO_USER=m 537CONFIG_CRYPTO_USER=m
545CONFIG_CRYPTO_CRYPTD=m 538CONFIG_CRYPTO_CRYPTD=m
546CONFIG_CRYPTO_TEST=m 539CONFIG_CRYPTO_TEST=m
540CONFIG_CRYPTO_RSA=m
541CONFIG_CRYPTO_DH=m
542CONFIG_CRYPTO_ECDH=m
543CONFIG_CRYPTO_ECRDSA=m
547CONFIG_CRYPTO_CHACHA20POLY1305=m 544CONFIG_CRYPTO_CHACHA20POLY1305=m
548CONFIG_CRYPTO_AEGIS128=m 545CONFIG_CRYPTO_AEGIS128=m
549CONFIG_CRYPTO_AEGIS128L=m 546CONFIG_CRYPTO_AEGIS128L=m
@@ -567,7 +564,6 @@ CONFIG_CRYPTO_RMD256=m
567CONFIG_CRYPTO_RMD320=m 564CONFIG_CRYPTO_RMD320=m
568CONFIG_CRYPTO_SHA3=m 565CONFIG_CRYPTO_SHA3=m
569CONFIG_CRYPTO_SM3=m 566CONFIG_CRYPTO_SM3=m
570CONFIG_CRYPTO_STREEBOG=m
571CONFIG_CRYPTO_TGR192=m 567CONFIG_CRYPTO_TGR192=m
572CONFIG_CRYPTO_WP512=m 568CONFIG_CRYPTO_WP512=m
573CONFIG_CRYPTO_AES_TI=m 569CONFIG_CRYPTO_AES_TI=m
@@ -610,6 +606,7 @@ CONFIG_ATOMIC64_SELFTEST=m
610CONFIG_ASYNC_RAID6_TEST=m 606CONFIG_ASYNC_RAID6_TEST=m
611CONFIG_TEST_HEXDUMP=m 607CONFIG_TEST_HEXDUMP=m
612CONFIG_TEST_STRING_HELPERS=m 608CONFIG_TEST_STRING_HELPERS=m
609CONFIG_TEST_STRSCPY=m
613CONFIG_TEST_KSTRTOX=m 610CONFIG_TEST_KSTRTOX=m
614CONFIG_TEST_PRINTF=m 611CONFIG_TEST_PRINTF=m
615CONFIG_TEST_BITMAP=m 612CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 5e1cc4c17852..39f603417928 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -85,9 +85,6 @@ CONFIG_INET_AH=m
85CONFIG_INET_ESP=m 85CONFIG_INET_ESP=m
86CONFIG_INET_ESP_OFFLOAD=m 86CONFIG_INET_ESP_OFFLOAD=m
87CONFIG_INET_IPCOMP=m 87CONFIG_INET_IPCOMP=m
88CONFIG_INET_XFRM_MODE_TRANSPORT=m
89CONFIG_INET_XFRM_MODE_TUNNEL=m
90CONFIG_INET_XFRM_MODE_BEET=m
91CONFIG_INET_DIAG=m 88CONFIG_INET_DIAG=m
92CONFIG_INET_UDP_DIAG=m 89CONFIG_INET_UDP_DIAG=m
93CONFIG_INET_RAW_DIAG=m 90CONFIG_INET_RAW_DIAG=m
@@ -219,7 +216,6 @@ CONFIG_IP_SET_HASH_NETNET=m
219CONFIG_IP_SET_HASH_NETPORT=m 216CONFIG_IP_SET_HASH_NETPORT=m
220CONFIG_IP_SET_HASH_NETIFACE=m 217CONFIG_IP_SET_HASH_NETIFACE=m
221CONFIG_IP_SET_LIST_SET=m 218CONFIG_IP_SET_LIST_SET=m
222CONFIG_NFT_CHAIN_ROUTE_IPV4=m
223CONFIG_NFT_DUP_IPV4=m 219CONFIG_NFT_DUP_IPV4=m
224CONFIG_NFT_FIB_IPV4=m 220CONFIG_NFT_FIB_IPV4=m
225CONFIG_NF_TABLES_ARP=y 221CONFIG_NF_TABLES_ARP=y
@@ -245,7 +241,6 @@ CONFIG_IP_NF_RAW=m
245CONFIG_IP_NF_ARPTABLES=m 241CONFIG_IP_NF_ARPTABLES=m
246CONFIG_IP_NF_ARPFILTER=m 242CONFIG_IP_NF_ARPFILTER=m
247CONFIG_IP_NF_ARP_MANGLE=m 243CONFIG_IP_NF_ARP_MANGLE=m
248CONFIG_NFT_CHAIN_ROUTE_IPV6=m
249CONFIG_NFT_DUP_IPV6=m 244CONFIG_NFT_DUP_IPV6=m
250CONFIG_NFT_FIB_IPV6=m 245CONFIG_NFT_FIB_IPV6=m
251CONFIG_NF_FLOW_TABLE_IPV6=m 246CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -325,7 +320,6 @@ CONFIG_AF_KCM=m
325# CONFIG_WIRELESS is not set 320# CONFIG_WIRELESS is not set
326CONFIG_PSAMPLE=m 321CONFIG_PSAMPLE=m
327CONFIG_NET_IFE=m 322CONFIG_NET_IFE=m
328# CONFIG_UEVENT_HELPER is not set
329CONFIG_DEVTMPFS=y 323CONFIG_DEVTMPFS=y
330CONFIG_DEVTMPFS_MOUNT=y 324CONFIG_DEVTMPFS_MOUNT=y
331CONFIG_TEST_ASYNC_DRIVER_PROBE=m 325CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -499,6 +493,8 @@ CONFIG_FB_FM2=y
499CONFIG_FB_ATARI=y 493CONFIG_FB_ATARI=y
500CONFIG_FB_VALKYRIE=y 494CONFIG_FB_VALKYRIE=y
501CONFIG_FB_MAC=y 495CONFIG_FB_MAC=y
496# CONFIG_LCD_CLASS_DEVICE is not set
497# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
502CONFIG_FRAMEBUFFER_CONSOLE=y 498CONFIG_FRAMEBUFFER_CONSOLE=y
503CONFIG_LOGO=y 499CONFIG_LOGO=y
504CONFIG_SOUND=m 500CONFIG_SOUND=m
@@ -619,13 +615,14 @@ CONFIG_NLS_MAC_TURKISH=m
619CONFIG_DLM=m 615CONFIG_DLM=m
620CONFIG_ENCRYPTED_KEYS=m 616CONFIG_ENCRYPTED_KEYS=m
621CONFIG_HARDENED_USERCOPY=y 617CONFIG_HARDENED_USERCOPY=y
622CONFIG_CRYPTO_RSA=m
623CONFIG_CRYPTO_DH=m
624CONFIG_CRYPTO_ECDH=m
625CONFIG_CRYPTO_MANAGER=y 618CONFIG_CRYPTO_MANAGER=y
626CONFIG_CRYPTO_USER=m 619CONFIG_CRYPTO_USER=m
627CONFIG_CRYPTO_CRYPTD=m 620CONFIG_CRYPTO_CRYPTD=m
628CONFIG_CRYPTO_TEST=m 621CONFIG_CRYPTO_TEST=m
622CONFIG_CRYPTO_RSA=m
623CONFIG_CRYPTO_DH=m
624CONFIG_CRYPTO_ECDH=m
625CONFIG_CRYPTO_ECRDSA=m
629CONFIG_CRYPTO_CHACHA20POLY1305=m 626CONFIG_CRYPTO_CHACHA20POLY1305=m
630CONFIG_CRYPTO_AEGIS128=m 627CONFIG_CRYPTO_AEGIS128=m
631CONFIG_CRYPTO_AEGIS128L=m 628CONFIG_CRYPTO_AEGIS128L=m
@@ -649,7 +646,6 @@ CONFIG_CRYPTO_RMD256=m
649CONFIG_CRYPTO_RMD320=m 646CONFIG_CRYPTO_RMD320=m
650CONFIG_CRYPTO_SHA3=m 647CONFIG_CRYPTO_SHA3=m
651CONFIG_CRYPTO_SM3=m 648CONFIG_CRYPTO_SM3=m
652CONFIG_CRYPTO_STREEBOG=m
653CONFIG_CRYPTO_TGR192=m 649CONFIG_CRYPTO_TGR192=m
654CONFIG_CRYPTO_WP512=m 650CONFIG_CRYPTO_WP512=m
655CONFIG_CRYPTO_AES_TI=m 651CONFIG_CRYPTO_AES_TI=m
@@ -692,6 +688,7 @@ CONFIG_ATOMIC64_SELFTEST=m
692CONFIG_ASYNC_RAID6_TEST=m 688CONFIG_ASYNC_RAID6_TEST=m
693CONFIG_TEST_HEXDUMP=m 689CONFIG_TEST_HEXDUMP=m
694CONFIG_TEST_STRING_HELPERS=m 690CONFIG_TEST_STRING_HELPERS=m
691CONFIG_TEST_STRSCPY=m
695CONFIG_TEST_KSTRTOX=m 692CONFIG_TEST_KSTRTOX=m
696CONFIG_TEST_PRINTF=m 693CONFIG_TEST_PRINTF=m
697CONFIG_TEST_BITMAP=m 694CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 170ac8792c2d..175a607f576c 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -63,9 +63,6 @@ CONFIG_INET_AH=m
63CONFIG_INET_ESP=m 63CONFIG_INET_ESP=m
64CONFIG_INET_ESP_OFFLOAD=m 64CONFIG_INET_ESP_OFFLOAD=m
65CONFIG_INET_IPCOMP=m 65CONFIG_INET_IPCOMP=m
66CONFIG_INET_XFRM_MODE_TRANSPORT=m
67CONFIG_INET_XFRM_MODE_TUNNEL=m
68CONFIG_INET_XFRM_MODE_BEET=m
69CONFIG_INET_DIAG=m 66CONFIG_INET_DIAG=m
70CONFIG_INET_UDP_DIAG=m 67CONFIG_INET_UDP_DIAG=m
71CONFIG_INET_RAW_DIAG=m 68CONFIG_INET_RAW_DIAG=m
@@ -197,7 +194,6 @@ CONFIG_IP_SET_HASH_NETNET=m
197CONFIG_IP_SET_HASH_NETPORT=m 194CONFIG_IP_SET_HASH_NETPORT=m
198CONFIG_IP_SET_HASH_NETIFACE=m 195CONFIG_IP_SET_HASH_NETIFACE=m
199CONFIG_IP_SET_LIST_SET=m 196CONFIG_IP_SET_LIST_SET=m
200CONFIG_NFT_CHAIN_ROUTE_IPV4=m
201CONFIG_NFT_DUP_IPV4=m 197CONFIG_NFT_DUP_IPV4=m
202CONFIG_NFT_FIB_IPV4=m 198CONFIG_NFT_FIB_IPV4=m
203CONFIG_NF_TABLES_ARP=y 199CONFIG_NF_TABLES_ARP=y
@@ -223,7 +219,6 @@ CONFIG_IP_NF_RAW=m
223CONFIG_IP_NF_ARPTABLES=m 219CONFIG_IP_NF_ARPTABLES=m
224CONFIG_IP_NF_ARPFILTER=m 220CONFIG_IP_NF_ARPFILTER=m
225CONFIG_IP_NF_ARP_MANGLE=m 221CONFIG_IP_NF_ARP_MANGLE=m
226CONFIG_NFT_CHAIN_ROUTE_IPV6=m
227CONFIG_NFT_DUP_IPV6=m 222CONFIG_NFT_DUP_IPV6=m
228CONFIG_NFT_FIB_IPV6=m 223CONFIG_NFT_FIB_IPV6=m
229CONFIG_NF_FLOW_TABLE_IPV6=m 224CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -300,7 +295,6 @@ CONFIG_AF_KCM=m
300# CONFIG_WIRELESS is not set 295# CONFIG_WIRELESS is not set
301CONFIG_PSAMPLE=m 296CONFIG_PSAMPLE=m
302CONFIG_NET_IFE=m 297CONFIG_NET_IFE=m
303# CONFIG_UEVENT_HELPER is not set
304CONFIG_DEVTMPFS=y 298CONFIG_DEVTMPFS=y
305CONFIG_DEVTMPFS_MOUNT=y 299CONFIG_DEVTMPFS_MOUNT=y
306CONFIG_TEST_ASYNC_DRIVER_PROBE=m 300CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -393,6 +387,8 @@ CONFIG_NTP_PPS=y
393CONFIG_PPS_CLIENT_LDISC=m 387CONFIG_PPS_CLIENT_LDISC=m
394CONFIG_PTP_1588_CLOCK=m 388CONFIG_PTP_1588_CLOCK=m
395# CONFIG_HWMON is not set 389# CONFIG_HWMON is not set
390# CONFIG_LCD_CLASS_DEVICE is not set
391# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
396CONFIG_HID=m 392CONFIG_HID=m
397CONFIG_HIDRAW=y 393CONFIG_HIDRAW=y
398CONFIG_UHID=m 394CONFIG_UHID=m
@@ -505,13 +501,14 @@ CONFIG_NLS_MAC_TURKISH=m
505CONFIG_DLM=m 501CONFIG_DLM=m
506CONFIG_ENCRYPTED_KEYS=m 502CONFIG_ENCRYPTED_KEYS=m
507CONFIG_HARDENED_USERCOPY=y 503CONFIG_HARDENED_USERCOPY=y
508CONFIG_CRYPTO_RSA=m
509CONFIG_CRYPTO_DH=m
510CONFIG_CRYPTO_ECDH=m
511CONFIG_CRYPTO_MANAGER=y 504CONFIG_CRYPTO_MANAGER=y
512CONFIG_CRYPTO_USER=m 505CONFIG_CRYPTO_USER=m
513CONFIG_CRYPTO_CRYPTD=m 506CONFIG_CRYPTO_CRYPTD=m
514CONFIG_CRYPTO_TEST=m 507CONFIG_CRYPTO_TEST=m
508CONFIG_CRYPTO_RSA=m
509CONFIG_CRYPTO_DH=m
510CONFIG_CRYPTO_ECDH=m
511CONFIG_CRYPTO_ECRDSA=m
515CONFIG_CRYPTO_CHACHA20POLY1305=m 512CONFIG_CRYPTO_CHACHA20POLY1305=m
516CONFIG_CRYPTO_AEGIS128=m 513CONFIG_CRYPTO_AEGIS128=m
517CONFIG_CRYPTO_AEGIS128L=m 514CONFIG_CRYPTO_AEGIS128L=m
@@ -535,7 +532,6 @@ CONFIG_CRYPTO_RMD256=m
535CONFIG_CRYPTO_RMD320=m 532CONFIG_CRYPTO_RMD320=m
536CONFIG_CRYPTO_SHA3=m 533CONFIG_CRYPTO_SHA3=m
537CONFIG_CRYPTO_SM3=m 534CONFIG_CRYPTO_SM3=m
538CONFIG_CRYPTO_STREEBOG=m
539CONFIG_CRYPTO_TGR192=m 535CONFIG_CRYPTO_TGR192=m
540CONFIG_CRYPTO_WP512=m 536CONFIG_CRYPTO_WP512=m
541CONFIG_CRYPTO_AES_TI=m 537CONFIG_CRYPTO_AES_TI=m
@@ -578,6 +574,7 @@ CONFIG_ATOMIC64_SELFTEST=m
578CONFIG_ASYNC_RAID6_TEST=m 574CONFIG_ASYNC_RAID6_TEST=m
579CONFIG_TEST_HEXDUMP=m 575CONFIG_TEST_HEXDUMP=m
580CONFIG_TEST_STRING_HELPERS=m 576CONFIG_TEST_STRING_HELPERS=m
577CONFIG_TEST_STRSCPY=m
581CONFIG_TEST_KSTRTOX=m 578CONFIG_TEST_KSTRTOX=m
582CONFIG_TEST_PRINTF=m 579CONFIG_TEST_PRINTF=m
583CONFIG_TEST_BITMAP=m 580CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index d865592a423e..f41c34d3cdd0 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -64,9 +64,6 @@ CONFIG_INET_AH=m
64CONFIG_INET_ESP=m 64CONFIG_INET_ESP=m
65CONFIG_INET_ESP_OFFLOAD=m 65CONFIG_INET_ESP_OFFLOAD=m
66CONFIG_INET_IPCOMP=m 66CONFIG_INET_IPCOMP=m
67CONFIG_INET_XFRM_MODE_TRANSPORT=m
68CONFIG_INET_XFRM_MODE_TUNNEL=m
69CONFIG_INET_XFRM_MODE_BEET=m
70CONFIG_INET_DIAG=m 67CONFIG_INET_DIAG=m
71CONFIG_INET_UDP_DIAG=m 68CONFIG_INET_UDP_DIAG=m
72CONFIG_INET_RAW_DIAG=m 69CONFIG_INET_RAW_DIAG=m
@@ -198,7 +195,6 @@ CONFIG_IP_SET_HASH_NETNET=m
198CONFIG_IP_SET_HASH_NETPORT=m 195CONFIG_IP_SET_HASH_NETPORT=m
199CONFIG_IP_SET_HASH_NETIFACE=m 196CONFIG_IP_SET_HASH_NETIFACE=m
200CONFIG_IP_SET_LIST_SET=m 197CONFIG_IP_SET_LIST_SET=m
201CONFIG_NFT_CHAIN_ROUTE_IPV4=m
202CONFIG_NFT_DUP_IPV4=m 198CONFIG_NFT_DUP_IPV4=m
203CONFIG_NFT_FIB_IPV4=m 199CONFIG_NFT_FIB_IPV4=m
204CONFIG_NF_TABLES_ARP=y 200CONFIG_NF_TABLES_ARP=y
@@ -224,7 +220,6 @@ CONFIG_IP_NF_RAW=m
224CONFIG_IP_NF_ARPTABLES=m 220CONFIG_IP_NF_ARPTABLES=m
225CONFIG_IP_NF_ARPFILTER=m 221CONFIG_IP_NF_ARPFILTER=m
226CONFIG_IP_NF_ARP_MANGLE=m 222CONFIG_IP_NF_ARP_MANGLE=m
227CONFIG_NFT_CHAIN_ROUTE_IPV6=m
228CONFIG_NFT_DUP_IPV6=m 223CONFIG_NFT_DUP_IPV6=m
229CONFIG_NFT_FIB_IPV6=m 224CONFIG_NFT_FIB_IPV6=m
230CONFIG_NF_FLOW_TABLE_IPV6=m 225CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -301,7 +296,6 @@ CONFIG_AF_KCM=m
301# CONFIG_WIRELESS is not set 296# CONFIG_WIRELESS is not set
302CONFIG_PSAMPLE=m 297CONFIG_PSAMPLE=m
303CONFIG_NET_IFE=m 298CONFIG_NET_IFE=m
304# CONFIG_UEVENT_HELPER is not set
305CONFIG_DEVTMPFS=y 299CONFIG_DEVTMPFS=y
306CONFIG_DEVTMPFS_MOUNT=y 300CONFIG_DEVTMPFS_MOUNT=y
307CONFIG_TEST_ASYNC_DRIVER_PROBE=m 301CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -394,6 +388,8 @@ CONFIG_NTP_PPS=y
394CONFIG_PPS_CLIENT_LDISC=m 388CONFIG_PPS_CLIENT_LDISC=m
395CONFIG_PTP_1588_CLOCK=m 389CONFIG_PTP_1588_CLOCK=m
396# CONFIG_HWMON is not set 390# CONFIG_HWMON is not set
391# CONFIG_LCD_CLASS_DEVICE is not set
392# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
397CONFIG_HID=m 393CONFIG_HID=m
398CONFIG_HIDRAW=y 394CONFIG_HIDRAW=y
399CONFIG_UHID=m 395CONFIG_UHID=m
@@ -506,13 +502,14 @@ CONFIG_NLS_MAC_TURKISH=m
506CONFIG_DLM=m 502CONFIG_DLM=m
507CONFIG_ENCRYPTED_KEYS=m 503CONFIG_ENCRYPTED_KEYS=m
508CONFIG_HARDENED_USERCOPY=y 504CONFIG_HARDENED_USERCOPY=y
509CONFIG_CRYPTO_RSA=m
510CONFIG_CRYPTO_DH=m
511CONFIG_CRYPTO_ECDH=m
512CONFIG_CRYPTO_MANAGER=y 505CONFIG_CRYPTO_MANAGER=y
513CONFIG_CRYPTO_USER=m 506CONFIG_CRYPTO_USER=m
514CONFIG_CRYPTO_CRYPTD=m 507CONFIG_CRYPTO_CRYPTD=m
515CONFIG_CRYPTO_TEST=m 508CONFIG_CRYPTO_TEST=m
509CONFIG_CRYPTO_RSA=m
510CONFIG_CRYPTO_DH=m
511CONFIG_CRYPTO_ECDH=m
512CONFIG_CRYPTO_ECRDSA=m
516CONFIG_CRYPTO_CHACHA20POLY1305=m 513CONFIG_CRYPTO_CHACHA20POLY1305=m
517CONFIG_CRYPTO_AEGIS128=m 514CONFIG_CRYPTO_AEGIS128=m
518CONFIG_CRYPTO_AEGIS128L=m 515CONFIG_CRYPTO_AEGIS128L=m
@@ -536,7 +533,6 @@ CONFIG_CRYPTO_RMD256=m
536CONFIG_CRYPTO_RMD320=m 533CONFIG_CRYPTO_RMD320=m
537CONFIG_CRYPTO_SHA3=m 534CONFIG_CRYPTO_SHA3=m
538CONFIG_CRYPTO_SM3=m 535CONFIG_CRYPTO_SM3=m
539CONFIG_CRYPTO_STREEBOG=m
540CONFIG_CRYPTO_TGR192=m 536CONFIG_CRYPTO_TGR192=m
541CONFIG_CRYPTO_WP512=m 537CONFIG_CRYPTO_WP512=m
542CONFIG_CRYPTO_AES_TI=m 538CONFIG_CRYPTO_AES_TI=m
@@ -579,6 +575,7 @@ CONFIG_ATOMIC64_SELFTEST=m
579CONFIG_ASYNC_RAID6_TEST=m 575CONFIG_ASYNC_RAID6_TEST=m
580CONFIG_TEST_HEXDUMP=m 576CONFIG_TEST_HEXDUMP=m
581CONFIG_TEST_STRING_HELPERS=m 577CONFIG_TEST_STRING_HELPERS=m
578CONFIG_TEST_STRSCPY=m
582CONFIG_TEST_KSTRTOX=m 579CONFIG_TEST_KSTRTOX=m
583CONFIG_TEST_PRINTF=m 580CONFIG_TEST_PRINTF=m
584CONFIG_TEST_BITMAP=m 581CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 034a9de90484..c9d2cb0a1cf4 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -65,9 +65,6 @@ CONFIG_INET_AH=m
65CONFIG_INET_ESP=m 65CONFIG_INET_ESP=m
66CONFIG_INET_ESP_OFFLOAD=m 66CONFIG_INET_ESP_OFFLOAD=m
67CONFIG_INET_IPCOMP=m 67CONFIG_INET_IPCOMP=m
68CONFIG_INET_XFRM_MODE_TRANSPORT=m
69CONFIG_INET_XFRM_MODE_TUNNEL=m
70CONFIG_INET_XFRM_MODE_BEET=m
71CONFIG_INET_DIAG=m 68CONFIG_INET_DIAG=m
72CONFIG_INET_UDP_DIAG=m 69CONFIG_INET_UDP_DIAG=m
73CONFIG_INET_RAW_DIAG=m 70CONFIG_INET_RAW_DIAG=m
@@ -199,7 +196,6 @@ CONFIG_IP_SET_HASH_NETNET=m
199CONFIG_IP_SET_HASH_NETPORT=m 196CONFIG_IP_SET_HASH_NETPORT=m
200CONFIG_IP_SET_HASH_NETIFACE=m 197CONFIG_IP_SET_HASH_NETIFACE=m
201CONFIG_IP_SET_LIST_SET=m 198CONFIG_IP_SET_LIST_SET=m
202CONFIG_NFT_CHAIN_ROUTE_IPV4=m
203CONFIG_NFT_DUP_IPV4=m 199CONFIG_NFT_DUP_IPV4=m
204CONFIG_NFT_FIB_IPV4=m 200CONFIG_NFT_FIB_IPV4=m
205CONFIG_NF_TABLES_ARP=y 201CONFIG_NF_TABLES_ARP=y
@@ -225,7 +221,6 @@ CONFIG_IP_NF_RAW=m
225CONFIG_IP_NF_ARPTABLES=m 221CONFIG_IP_NF_ARPTABLES=m
226CONFIG_IP_NF_ARPFILTER=m 222CONFIG_IP_NF_ARPFILTER=m
227CONFIG_IP_NF_ARP_MANGLE=m 223CONFIG_IP_NF_ARP_MANGLE=m
228CONFIG_NFT_CHAIN_ROUTE_IPV6=m
229CONFIG_NFT_DUP_IPV6=m 224CONFIG_NFT_DUP_IPV6=m
230CONFIG_NFT_FIB_IPV6=m 225CONFIG_NFT_FIB_IPV6=m
231CONFIG_NF_FLOW_TABLE_IPV6=m 226CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -302,7 +297,6 @@ CONFIG_AF_KCM=m
302# CONFIG_WIRELESS is not set 297# CONFIG_WIRELESS is not set
303CONFIG_PSAMPLE=m 298CONFIG_PSAMPLE=m
304CONFIG_NET_IFE=m 299CONFIG_NET_IFE=m
305# CONFIG_UEVENT_HELPER is not set
306CONFIG_DEVTMPFS=y 300CONFIG_DEVTMPFS=y
307CONFIG_DEVTMPFS_MOUNT=y 301CONFIG_DEVTMPFS_MOUNT=y
308CONFIG_TEST_ASYNC_DRIVER_PROBE=m 302CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -408,6 +402,8 @@ CONFIG_PPS_CLIENT_PARPORT=m
408CONFIG_PTP_1588_CLOCK=m 402CONFIG_PTP_1588_CLOCK=m
409# CONFIG_HWMON is not set 403# CONFIG_HWMON is not set
410CONFIG_FB=y 404CONFIG_FB=y
405# CONFIG_LCD_CLASS_DEVICE is not set
406# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
411CONFIG_FRAMEBUFFER_CONSOLE=y 407CONFIG_FRAMEBUFFER_CONSOLE=y
412CONFIG_LOGO=y 408CONFIG_LOGO=y
413CONFIG_SOUND=m 409CONFIG_SOUND=m
@@ -524,13 +520,14 @@ CONFIG_NLS_MAC_TURKISH=m
524CONFIG_DLM=m 520CONFIG_DLM=m
525CONFIG_ENCRYPTED_KEYS=m 521CONFIG_ENCRYPTED_KEYS=m
526CONFIG_HARDENED_USERCOPY=y 522CONFIG_HARDENED_USERCOPY=y
527CONFIG_CRYPTO_RSA=m
528CONFIG_CRYPTO_DH=m
529CONFIG_CRYPTO_ECDH=m
530CONFIG_CRYPTO_MANAGER=y 523CONFIG_CRYPTO_MANAGER=y
531CONFIG_CRYPTO_USER=m 524CONFIG_CRYPTO_USER=m
532CONFIG_CRYPTO_CRYPTD=m 525CONFIG_CRYPTO_CRYPTD=m
533CONFIG_CRYPTO_TEST=m 526CONFIG_CRYPTO_TEST=m
527CONFIG_CRYPTO_RSA=m
528CONFIG_CRYPTO_DH=m
529CONFIG_CRYPTO_ECDH=m
530CONFIG_CRYPTO_ECRDSA=m
534CONFIG_CRYPTO_CHACHA20POLY1305=m 531CONFIG_CRYPTO_CHACHA20POLY1305=m
535CONFIG_CRYPTO_AEGIS128=m 532CONFIG_CRYPTO_AEGIS128=m
536CONFIG_CRYPTO_AEGIS128L=m 533CONFIG_CRYPTO_AEGIS128L=m
@@ -554,7 +551,6 @@ CONFIG_CRYPTO_RMD256=m
554CONFIG_CRYPTO_RMD320=m 551CONFIG_CRYPTO_RMD320=m
555CONFIG_CRYPTO_SHA3=m 552CONFIG_CRYPTO_SHA3=m
556CONFIG_CRYPTO_SM3=m 553CONFIG_CRYPTO_SM3=m
557CONFIG_CRYPTO_STREEBOG=m
558CONFIG_CRYPTO_TGR192=m 554CONFIG_CRYPTO_TGR192=m
559CONFIG_CRYPTO_WP512=m 555CONFIG_CRYPTO_WP512=m
560CONFIG_CRYPTO_AES_TI=m 556CONFIG_CRYPTO_AES_TI=m
@@ -597,6 +593,7 @@ CONFIG_ATOMIC64_SELFTEST=m
597CONFIG_ASYNC_RAID6_TEST=m 593CONFIG_ASYNC_RAID6_TEST=m
598CONFIG_TEST_HEXDUMP=m 594CONFIG_TEST_HEXDUMP=m
599CONFIG_TEST_STRING_HELPERS=m 595CONFIG_TEST_STRING_HELPERS=m
596CONFIG_TEST_STRSCPY=m
600CONFIG_TEST_KSTRTOX=m 597CONFIG_TEST_KSTRTOX=m
601CONFIG_TEST_PRINTF=m 598CONFIG_TEST_PRINTF=m
602CONFIG_TEST_BITMAP=m 599CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 49be0f9fcd8d..79a64fdd6bf0 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -61,9 +61,6 @@ CONFIG_INET_AH=m
61CONFIG_INET_ESP=m 61CONFIG_INET_ESP=m
62CONFIG_INET_ESP_OFFLOAD=m 62CONFIG_INET_ESP_OFFLOAD=m
63CONFIG_INET_IPCOMP=m 63CONFIG_INET_IPCOMP=m
64CONFIG_INET_XFRM_MODE_TRANSPORT=m
65CONFIG_INET_XFRM_MODE_TUNNEL=m
66CONFIG_INET_XFRM_MODE_BEET=m
67CONFIG_INET_DIAG=m 64CONFIG_INET_DIAG=m
68CONFIG_INET_UDP_DIAG=m 65CONFIG_INET_UDP_DIAG=m
69CONFIG_INET_RAW_DIAG=m 66CONFIG_INET_RAW_DIAG=m
@@ -195,7 +192,6 @@ CONFIG_IP_SET_HASH_NETNET=m
195CONFIG_IP_SET_HASH_NETPORT=m 192CONFIG_IP_SET_HASH_NETPORT=m
196CONFIG_IP_SET_HASH_NETIFACE=m 193CONFIG_IP_SET_HASH_NETIFACE=m
197CONFIG_IP_SET_LIST_SET=m 194CONFIG_IP_SET_LIST_SET=m
198CONFIG_NFT_CHAIN_ROUTE_IPV4=m
199CONFIG_NFT_DUP_IPV4=m 195CONFIG_NFT_DUP_IPV4=m
200CONFIG_NFT_FIB_IPV4=m 196CONFIG_NFT_FIB_IPV4=m
201CONFIG_NF_TABLES_ARP=y 197CONFIG_NF_TABLES_ARP=y
@@ -221,7 +217,6 @@ CONFIG_IP_NF_RAW=m
221CONFIG_IP_NF_ARPTABLES=m 217CONFIG_IP_NF_ARPTABLES=m
222CONFIG_IP_NF_ARPFILTER=m 218CONFIG_IP_NF_ARPFILTER=m
223CONFIG_IP_NF_ARP_MANGLE=m 219CONFIG_IP_NF_ARP_MANGLE=m
224CONFIG_NFT_CHAIN_ROUTE_IPV6=m
225CONFIG_NFT_DUP_IPV6=m 220CONFIG_NFT_DUP_IPV6=m
226CONFIG_NFT_FIB_IPV6=m 221CONFIG_NFT_FIB_IPV6=m
227CONFIG_NF_FLOW_TABLE_IPV6=m 222CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -298,7 +293,6 @@ CONFIG_AF_KCM=m
298# CONFIG_WIRELESS is not set 293# CONFIG_WIRELESS is not set
299CONFIG_PSAMPLE=m 294CONFIG_PSAMPLE=m
300CONFIG_NET_IFE=m 295CONFIG_NET_IFE=m
301# CONFIG_UEVENT_HELPER is not set
302CONFIG_DEVTMPFS=y 296CONFIG_DEVTMPFS=y
303CONFIG_DEVTMPFS_MOUNT=y 297CONFIG_DEVTMPFS_MOUNT=y
304CONFIG_TEST_ASYNC_DRIVER_PROBE=m 298CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -394,6 +388,8 @@ CONFIG_PPS_CLIENT_LDISC=m
394CONFIG_PTP_1588_CLOCK=m 388CONFIG_PTP_1588_CLOCK=m
395# CONFIG_HWMON is not set 389# CONFIG_HWMON is not set
396CONFIG_FB=y 390CONFIG_FB=y
391# CONFIG_LCD_CLASS_DEVICE is not set
392# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
397CONFIG_FRAMEBUFFER_CONSOLE=y 393CONFIG_FRAMEBUFFER_CONSOLE=y
398CONFIG_LOGO=y 394CONFIG_LOGO=y
399CONFIG_HID=m 395CONFIG_HID=m
@@ -508,13 +504,14 @@ CONFIG_NLS_MAC_TURKISH=m
508CONFIG_DLM=m 504CONFIG_DLM=m
509CONFIG_ENCRYPTED_KEYS=m 505CONFIG_ENCRYPTED_KEYS=m
510CONFIG_HARDENED_USERCOPY=y 506CONFIG_HARDENED_USERCOPY=y
511CONFIG_CRYPTO_RSA=m
512CONFIG_CRYPTO_DH=m
513CONFIG_CRYPTO_ECDH=m
514CONFIG_CRYPTO_MANAGER=y 507CONFIG_CRYPTO_MANAGER=y
515CONFIG_CRYPTO_USER=m 508CONFIG_CRYPTO_USER=m
516CONFIG_CRYPTO_CRYPTD=m 509CONFIG_CRYPTO_CRYPTD=m
517CONFIG_CRYPTO_TEST=m 510CONFIG_CRYPTO_TEST=m
511CONFIG_CRYPTO_RSA=m
512CONFIG_CRYPTO_DH=m
513CONFIG_CRYPTO_ECDH=m
514CONFIG_CRYPTO_ECRDSA=m
518CONFIG_CRYPTO_CHACHA20POLY1305=m 515CONFIG_CRYPTO_CHACHA20POLY1305=m
519CONFIG_CRYPTO_AEGIS128=m 516CONFIG_CRYPTO_AEGIS128=m
520CONFIG_CRYPTO_AEGIS128L=m 517CONFIG_CRYPTO_AEGIS128L=m
@@ -538,7 +535,6 @@ CONFIG_CRYPTO_RMD256=m
538CONFIG_CRYPTO_RMD320=m 535CONFIG_CRYPTO_RMD320=m
539CONFIG_CRYPTO_SHA3=m 536CONFIG_CRYPTO_SHA3=m
540CONFIG_CRYPTO_SM3=m 537CONFIG_CRYPTO_SM3=m
541CONFIG_CRYPTO_STREEBOG=m
542CONFIG_CRYPTO_TGR192=m 538CONFIG_CRYPTO_TGR192=m
543CONFIG_CRYPTO_WP512=m 539CONFIG_CRYPTO_WP512=m
544CONFIG_CRYPTO_AES_TI=m 540CONFIG_CRYPTO_AES_TI=m
@@ -581,6 +577,7 @@ CONFIG_ATOMIC64_SELFTEST=m
581CONFIG_ASYNC_RAID6_TEST=m 577CONFIG_ASYNC_RAID6_TEST=m
582CONFIG_TEST_HEXDUMP=m 578CONFIG_TEST_HEXDUMP=m
583CONFIG_TEST_STRING_HELPERS=m 579CONFIG_TEST_STRING_HELPERS=m
580CONFIG_TEST_STRSCPY=m
584CONFIG_TEST_KSTRTOX=m 581CONFIG_TEST_KSTRTOX=m
585CONFIG_TEST_PRINTF=m 582CONFIG_TEST_PRINTF=m
586CONFIG_TEST_BITMAP=m 583CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a71acf4a6004..e3402a5d165b 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -61,9 +61,6 @@ CONFIG_INET_AH=m
61CONFIG_INET_ESP=m 61CONFIG_INET_ESP=m
62CONFIG_INET_ESP_OFFLOAD=m 62CONFIG_INET_ESP_OFFLOAD=m
63CONFIG_INET_IPCOMP=m 63CONFIG_INET_IPCOMP=m
64CONFIG_INET_XFRM_MODE_TRANSPORT=m
65CONFIG_INET_XFRM_MODE_TUNNEL=m
66CONFIG_INET_XFRM_MODE_BEET=m
67CONFIG_INET_DIAG=m 64CONFIG_INET_DIAG=m
68CONFIG_INET_UDP_DIAG=m 65CONFIG_INET_UDP_DIAG=m
69CONFIG_INET_RAW_DIAG=m 66CONFIG_INET_RAW_DIAG=m
@@ -195,7 +192,6 @@ CONFIG_IP_SET_HASH_NETNET=m
195CONFIG_IP_SET_HASH_NETPORT=m 192CONFIG_IP_SET_HASH_NETPORT=m
196CONFIG_IP_SET_HASH_NETIFACE=m 193CONFIG_IP_SET_HASH_NETIFACE=m
197CONFIG_IP_SET_LIST_SET=m 194CONFIG_IP_SET_LIST_SET=m
198CONFIG_NFT_CHAIN_ROUTE_IPV4=m
199CONFIG_NFT_DUP_IPV4=m 195CONFIG_NFT_DUP_IPV4=m
200CONFIG_NFT_FIB_IPV4=m 196CONFIG_NFT_FIB_IPV4=m
201CONFIG_NF_TABLES_ARP=y 197CONFIG_NF_TABLES_ARP=y
@@ -221,7 +217,6 @@ CONFIG_IP_NF_RAW=m
221CONFIG_IP_NF_ARPTABLES=m 217CONFIG_IP_NF_ARPTABLES=m
222CONFIG_IP_NF_ARPFILTER=m 218CONFIG_IP_NF_ARPFILTER=m
223CONFIG_IP_NF_ARP_MANGLE=m 219CONFIG_IP_NF_ARP_MANGLE=m
224CONFIG_NFT_CHAIN_ROUTE_IPV6=m
225CONFIG_NFT_DUP_IPV6=m 220CONFIG_NFT_DUP_IPV6=m
226CONFIG_NFT_FIB_IPV6=m 221CONFIG_NFT_FIB_IPV6=m
227CONFIG_NF_FLOW_TABLE_IPV6=m 222CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -298,7 +293,6 @@ CONFIG_AF_KCM=m
298# CONFIG_WIRELESS is not set 293# CONFIG_WIRELESS is not set
299CONFIG_PSAMPLE=m 294CONFIG_PSAMPLE=m
300CONFIG_NET_IFE=m 295CONFIG_NET_IFE=m
301# CONFIG_UEVENT_HELPER is not set
302CONFIG_DEVTMPFS=y 296CONFIG_DEVTMPFS=y
303CONFIG_DEVTMPFS_MOUNT=y 297CONFIG_DEVTMPFS_MOUNT=y
304CONFIG_TEST_ASYNC_DRIVER_PROBE=m 298CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -393,6 +387,8 @@ CONFIG_PPS_CLIENT_LDISC=m
393CONFIG_PTP_1588_CLOCK=m 387CONFIG_PTP_1588_CLOCK=m
394# CONFIG_HWMON is not set 388# CONFIG_HWMON is not set
395CONFIG_FB=y 389CONFIG_FB=y
390# CONFIG_LCD_CLASS_DEVICE is not set
391# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
396CONFIG_FRAMEBUFFER_CONSOLE=y 392CONFIG_FRAMEBUFFER_CONSOLE=y
397CONFIG_LOGO=y 393CONFIG_LOGO=y
398CONFIG_HID=m 394CONFIG_HID=m
@@ -507,13 +503,14 @@ CONFIG_NLS_MAC_TURKISH=m
507CONFIG_DLM=m 503CONFIG_DLM=m
508CONFIG_ENCRYPTED_KEYS=m 504CONFIG_ENCRYPTED_KEYS=m
509CONFIG_HARDENED_USERCOPY=y 505CONFIG_HARDENED_USERCOPY=y
510CONFIG_CRYPTO_RSA=m
511CONFIG_CRYPTO_DH=m
512CONFIG_CRYPTO_ECDH=m
513CONFIG_CRYPTO_MANAGER=y 506CONFIG_CRYPTO_MANAGER=y
514CONFIG_CRYPTO_USER=m 507CONFIG_CRYPTO_USER=m
515CONFIG_CRYPTO_CRYPTD=m 508CONFIG_CRYPTO_CRYPTD=m
516CONFIG_CRYPTO_TEST=m 509CONFIG_CRYPTO_TEST=m
510CONFIG_CRYPTO_RSA=m
511CONFIG_CRYPTO_DH=m
512CONFIG_CRYPTO_ECDH=m
513CONFIG_CRYPTO_ECRDSA=m
517CONFIG_CRYPTO_CHACHA20POLY1305=m 514CONFIG_CRYPTO_CHACHA20POLY1305=m
518CONFIG_CRYPTO_AEGIS128=m 515CONFIG_CRYPTO_AEGIS128=m
519CONFIG_CRYPTO_AEGIS128L=m 516CONFIG_CRYPTO_AEGIS128L=m
@@ -537,7 +534,6 @@ CONFIG_CRYPTO_RMD256=m
537CONFIG_CRYPTO_RMD320=m 534CONFIG_CRYPTO_RMD320=m
538CONFIG_CRYPTO_SHA3=m 535CONFIG_CRYPTO_SHA3=m
539CONFIG_CRYPTO_SM3=m 536CONFIG_CRYPTO_SM3=m
540CONFIG_CRYPTO_STREEBOG=m
541CONFIG_CRYPTO_TGR192=m 537CONFIG_CRYPTO_TGR192=m
542CONFIG_CRYPTO_WP512=m 538CONFIG_CRYPTO_WP512=m
543CONFIG_CRYPTO_AES_TI=m 539CONFIG_CRYPTO_AES_TI=m
@@ -580,6 +576,7 @@ CONFIG_ATOMIC64_SELFTEST=m
580CONFIG_ASYNC_RAID6_TEST=m 576CONFIG_ASYNC_RAID6_TEST=m
581CONFIG_TEST_HEXDUMP=m 577CONFIG_TEST_HEXDUMP=m
582CONFIG_TEST_STRING_HELPERS=m 578CONFIG_TEST_STRING_HELPERS=m
579CONFIG_TEST_STRSCPY=m
583CONFIG_TEST_KSTRTOX=m 580CONFIG_TEST_KSTRTOX=m
584CONFIG_TEST_PRINTF=m 581CONFIG_TEST_PRINTF=m
585CONFIG_TEST_BITMAP=m 582CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index b4aa853051bd..30cd59caf037 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -18,57 +18,22 @@
18#include <asm/pgalloc.h> 18#include <asm/pgalloc.h>
19 19
20#if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE) 20#if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE)
21 21void arch_dma_prep_coherent(struct page *page, size_t size)
22void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
23 gfp_t flag, unsigned long attrs)
24{ 22{
25 struct page *page, **map; 23 cache_push(page_to_phys(page), size);
26 pgprot_t pgprot;
27 void *addr;
28 int i, order;
29
30 pr_debug("dma_alloc_coherent: %d,%x\n", size, flag);
31
32 size = PAGE_ALIGN(size);
33 order = get_order(size);
34
35 page = alloc_pages(flag | __GFP_ZERO, order);
36 if (!page)
37 return NULL;
38
39 *handle = page_to_phys(page);
40 map = kmalloc(sizeof(struct page *) << order, flag & ~__GFP_DMA);
41 if (!map) {
42 __free_pages(page, order);
43 return NULL;
44 }
45 split_page(page, order);
46
47 order = 1 << order;
48 size >>= PAGE_SHIFT;
49 map[0] = page;
50 for (i = 1; i < size; i++)
51 map[i] = page + i;
52 for (; i < order; i++)
53 __free_page(page + i);
54 pgprot = __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
55 if (CPU_IS_040_OR_060)
56 pgprot_val(pgprot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S;
57 else
58 pgprot_val(pgprot) |= _PAGE_NOCACHE030;
59 addr = vmap(map, size, VM_MAP, pgprot);
60 kfree(map);
61
62 return addr;
63} 24}
64 25
65void arch_dma_free(struct device *dev, size_t size, void *addr, 26pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
66 dma_addr_t handle, unsigned long attrs) 27 unsigned long attrs)
67{ 28{
68 pr_debug("dma_free_coherent: %p, %x\n", addr, handle); 29 if (CPU_IS_040_OR_060) {
69 vfree(addr); 30 pgprot_val(prot) &= ~_PAGE_CACHE040;
31 pgprot_val(prot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S;
32 } else {
33 pgprot_val(prot) |= _PAGE_NOCACHE030;
34 }
35 return prot;
70} 36}
71
72#else 37#else
73 38
74#include <asm/cacheflush.h> 39#include <asm/cacheflush.h>
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 8f4486c4415b..eceff9b75b22 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -17,6 +17,7 @@ archscripts: scripts_basic
17 $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs 17 $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
18 18
19KBUILD_DEFCONFIG := 32r2el_defconfig 19KBUILD_DEFCONFIG := 32r2el_defconfig
20KBUILD_DTBS := dtbs
20 21
21# 22#
22# Select the object file format to substitute into the linker script. 23# Select the object file format to substitute into the linker script.
@@ -384,7 +385,7 @@ quiet_cmd_64 = OBJCOPY $@
384vmlinux.64: vmlinux 385vmlinux.64: vmlinux
385 $(call cmd,64) 386 $(call cmd,64)
386 387
387all: $(all-y) 388all: $(all-y) $(KBUILD_DTBS)
388 389
389# boot 390# boot
390$(boot-y): $(vmlinux-32) FORCE 391$(boot-y): $(vmlinux-32) FORCE
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 3c453a1f1ff1..172801ed35b8 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -78,6 +78,8 @@ OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \
78$(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE 78$(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
79 $(call if_changed,objcopy) 79 $(call if_changed,objcopy)
80 80
81HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE)
82
81# Calculate the load address of the compressed kernel image 83# Calculate the load address of the compressed kernel image
82hostprogs-y := calc_vmlinuz_load_addr 84hostprogs-y := calc_vmlinuz_load_addr
83 85
diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
index 240f1d12df75..080b926d2623 100644
--- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
+++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
@@ -9,7 +9,7 @@
9#include <stdint.h> 9#include <stdint.h>
10#include <stdio.h> 10#include <stdio.h>
11#include <stdlib.h> 11#include <stdlib.h>
12#include "../../../../include/linux/sizes.h" 12#include <linux/sizes.h>
13 13
14int main(int argc, char *argv[]) 14int main(int argc, char *argv[])
15{ 15{
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 94096299fc56..9a82dd11c0e9 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -254,10 +254,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
254#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) 254#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
255 255
256#define ATOMIC64_OP(op, c_op, asm_op) \ 256#define ATOMIC64_OP(op, c_op, asm_op) \
257static __inline__ void atomic64_##op(long i, atomic64_t * v) \ 257static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \
258{ \ 258{ \
259 if (kernel_uses_llsc) { \ 259 if (kernel_uses_llsc) { \
260 long temp; \ 260 s64 temp; \
261 \ 261 \
262 loongson_llsc_mb(); \ 262 loongson_llsc_mb(); \
263 __asm__ __volatile__( \ 263 __asm__ __volatile__( \
@@ -280,12 +280,12 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
280} 280}
281 281
282#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ 282#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \
283static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ 283static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \
284{ \ 284{ \
285 long result; \ 285 s64 result; \
286 \ 286 \
287 if (kernel_uses_llsc) { \ 287 if (kernel_uses_llsc) { \
288 long temp; \ 288 s64 temp; \
289 \ 289 \
290 loongson_llsc_mb(); \ 290 loongson_llsc_mb(); \
291 __asm__ __volatile__( \ 291 __asm__ __volatile__( \
@@ -314,12 +314,12 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
314} 314}
315 315
316#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ 316#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \
317static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ 317static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \
318{ \ 318{ \
319 long result; \ 319 s64 result; \
320 \ 320 \
321 if (kernel_uses_llsc) { \ 321 if (kernel_uses_llsc) { \
322 long temp; \ 322 s64 temp; \
323 \ 323 \
324 loongson_llsc_mb(); \ 324 loongson_llsc_mb(); \
325 __asm__ __volatile__( \ 325 __asm__ __volatile__( \
@@ -386,14 +386,14 @@ ATOMIC64_OPS(xor, ^=, xor)
386 * Atomically test @v and subtract @i if @v is greater or equal than @i. 386 * Atomically test @v and subtract @i if @v is greater or equal than @i.
387 * The function returns the old value of @v minus @i. 387 * The function returns the old value of @v minus @i.
388 */ 388 */
389static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) 389static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v)
390{ 390{
391 long result; 391 s64 result;
392 392
393 smp_mb__before_llsc(); 393 smp_mb__before_llsc();
394 394
395 if (kernel_uses_llsc) { 395 if (kernel_uses_llsc) {
396 long temp; 396 s64 temp;
397 397
398 __asm__ __volatile__( 398 __asm__ __volatile__(
399 " .set push \n" 399 " .set push \n"
diff --git a/arch/mips/include/asm/mach-ath79/ar933x_uart.h b/arch/mips/include/asm/mach-ath79/ar933x_uart.h
index b8f8af7dc47c..cacf3545e018 100644
--- a/arch/mips/include/asm/mach-ath79/ar933x_uart.h
+++ b/arch/mips/include/asm/mach-ath79/ar933x_uart.h
@@ -24,8 +24,8 @@
24#define AR933X_UART_CS_PARITY_S 0 24#define AR933X_UART_CS_PARITY_S 0
25#define AR933X_UART_CS_PARITY_M 0x3 25#define AR933X_UART_CS_PARITY_M 0x3
26#define AR933X_UART_CS_PARITY_NONE 0 26#define AR933X_UART_CS_PARITY_NONE 0
27#define AR933X_UART_CS_PARITY_ODD 1 27#define AR933X_UART_CS_PARITY_ODD 2
28#define AR933X_UART_CS_PARITY_EVEN 2 28#define AR933X_UART_CS_PARITY_EVEN 3
29#define AR933X_UART_CS_IF_MODE_S 2 29#define AR933X_UART_CS_IF_MODE_S 2
30#define AR933X_UART_CS_IF_MODE_M 0x3 30#define AR933X_UART_CS_IF_MODE_M 0x3
31#define AR933X_UART_CS_IF_MODE_NONE 0 31#define AR933X_UART_CS_IF_MODE_NONE 0
diff --git a/arch/mips/include/asm/mips-gic.h b/arch/mips/include/asm/mips-gic.h
index 75a1cdee1331..084cac1c5ea2 100644
--- a/arch/mips/include/asm/mips-gic.h
+++ b/arch/mips/include/asm/mips-gic.h
@@ -311,6 +311,36 @@ static inline bool mips_gic_present(void)
311} 311}
312 312
313/** 313/**
314 * mips_gic_vx_map_reg() - Return GIC_Vx_<intr>_MAP register offset
315 * @intr: A GIC local interrupt
316 *
317 * Determine the index of the GIC_VL_<intr>_MAP or GIC_VO_<intr>_MAP register
318 * within the block of GIC map registers. This is almost the same as the order
319 * of interrupts in the pending & mask registers, as used by enum
320 * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the
321 * interrupts after it...
322 *
323 * Return: The map register index corresponding to @intr.
324 *
325 * The return value is suitable for use with the (read|write)_gic_v[lo]_map
326 * accessor functions.
327 */
328static inline unsigned int
329mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr)
330{
331 /* WD, Compare & Timer are 1:1 */
332 if (intr <= GIC_LOCAL_INT_TIMER)
333 return intr;
334
335 /* FDC moves to after Timer... */
336 if (intr == GIC_LOCAL_INT_FDC)
337 return GIC_LOCAL_INT_TIMER + 1;
338
339 /* As a result everything else is offset by 1 */
340 return intr + 1;
341}
342
343/**
314 * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq 344 * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq
315 * 345 *
316 * Determine the virq number to use for the coprocessor 0 count/compare 346 * Determine the virq number to use for the coprocessor 0 count/compare
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 50ee7213b432..d79f2b432318 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -203,7 +203,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
203 203
204bool __virt_addr_valid(const volatile void *kaddr) 204bool __virt_addr_valid(const volatile void *kaddr)
205{ 205{
206 unsigned long vaddr = (unsigned long)vaddr; 206 unsigned long vaddr = (unsigned long)kaddr;
207 207
208 if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE)) 208 if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE))
209 return false; 209 return false;
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 65b6e85447b1..144ceb0fba88 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -391,6 +391,7 @@ static struct work_registers build_get_work_registers(u32 **p)
391static void build_restore_work_registers(u32 **p) 391static void build_restore_work_registers(u32 **p)
392{ 392{
393 if (scratch_reg >= 0) { 393 if (scratch_reg >= 0) {
394 uasm_i_ehb(p);
394 UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); 395 UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
395 return; 396 return;
396 } 397 }
@@ -668,10 +669,12 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r,
668 uasm_i_mtc0(p, 0, C0_PAGEMASK); 669 uasm_i_mtc0(p, 0, C0_PAGEMASK);
669 uasm_il_b(p, r, lid); 670 uasm_il_b(p, r, lid);
670 } 671 }
671 if (scratch_reg >= 0) 672 if (scratch_reg >= 0) {
673 uasm_i_ehb(p);
672 UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); 674 UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
673 else 675 } else {
674 UASM_i_LW(p, 1, scratchpad_offset(0), 0); 676 UASM_i_LW(p, 1, scratchpad_offset(0), 0);
677 }
675 } else { 678 } else {
676 /* Reset default page size */ 679 /* Reset default page size */
677 if (PM_DEFAULT_MASK >> 16) { 680 if (PM_DEFAULT_MASK >> 16) {
@@ -938,10 +941,12 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
938 uasm_i_jr(p, ptr); 941 uasm_i_jr(p, ptr);
939 942
940 if (mode == refill_scratch) { 943 if (mode == refill_scratch) {
941 if (scratch_reg >= 0) 944 if (scratch_reg >= 0) {
945 uasm_i_ehb(p);
942 UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); 946 UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
943 else 947 } else {
944 UASM_i_LW(p, 1, scratchpad_offset(0), 0); 948 UASM_i_LW(p, 1, scratchpad_offset(0), 0);
949 }
945 } else { 950 } else {
946 uasm_i_nop(p); 951 uasm_i_nop(p);
947 } 952 }
@@ -1258,6 +1263,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
1258 UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ 1263 UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */
1259 1264
1260 if (c0_scratch_reg >= 0) { 1265 if (c0_scratch_reg >= 0) {
1266 uasm_i_ehb(p);
1261 UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); 1267 UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg);
1262 build_tlb_write_entry(p, l, r, tlb_random); 1268 build_tlb_write_entry(p, l, r, tlb_random);
1263 uasm_l_leave(l, *p); 1269 uasm_l_leave(l, *p);
@@ -1603,15 +1609,17 @@ static void build_setup_pgd(void)
1603 uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); 1609 uasm_i_dinsm(&p, a0, 0, 29, 64 - 29);
1604 uasm_l_tlbl_goaround1(&l, p); 1610 uasm_l_tlbl_goaround1(&l, p);
1605 UASM_i_SLL(&p, a0, a0, 11); 1611 UASM_i_SLL(&p, a0, a0, 11);
1606 uasm_i_jr(&p, 31);
1607 UASM_i_MTC0(&p, a0, C0_CONTEXT); 1612 UASM_i_MTC0(&p, a0, C0_CONTEXT);
1613 uasm_i_jr(&p, 31);
1614 uasm_i_ehb(&p);
1608 } else { 1615 } else {
1609 /* PGD in c0_KScratch */ 1616 /* PGD in c0_KScratch */
1610 uasm_i_jr(&p, 31);
1611 if (cpu_has_ldpte) 1617 if (cpu_has_ldpte)
1612 UASM_i_MTC0(&p, a0, C0_PWBASE); 1618 UASM_i_MTC0(&p, a0, C0_PWBASE);
1613 else 1619 else
1614 UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); 1620 UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
1621 uasm_i_jr(&p, 31);
1622 uasm_i_ehb(&p);
1615 } 1623 }
1616#else 1624#else
1617#ifdef CONFIG_SMP 1625#ifdef CONFIG_SMP
@@ -1625,13 +1633,16 @@ static void build_setup_pgd(void)
1625 UASM_i_LA_mostly(&p, a2, pgdc); 1633 UASM_i_LA_mostly(&p, a2, pgdc);
1626 UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); 1634 UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2);
1627#endif /* SMP */ 1635#endif /* SMP */
1628 uasm_i_jr(&p, 31);
1629 1636
1630 /* if pgd_reg is allocated, save PGD also to scratch register */ 1637 /* if pgd_reg is allocated, save PGD also to scratch register */
1631 if (pgd_reg != -1) 1638 if (pgd_reg != -1) {
1632 UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); 1639 UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
1633 else 1640 uasm_i_jr(&p, 31);
1641 uasm_i_ehb(&p);
1642 } else {
1643 uasm_i_jr(&p, 31);
1634 uasm_i_nop(&p); 1644 uasm_i_nop(&p);
1645 }
1635#endif 1646#endif
1636 if (p >= (u32 *)tlbmiss_handler_setup_pgd_end) 1647 if (p >= (u32 *)tlbmiss_handler_setup_pgd_end)
1637 panic("tlbmiss_handler_setup_pgd space exceeded"); 1648 panic("tlbmiss_handler_setup_pgd space exceeded");
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index f241ded9239b..1f0f29a289d3 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -786,6 +786,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
786 /* 32-bit PC relative address */ 786 /* 32-bit PC relative address */
787 *loc = val - dot - 8 + addend; 787 *loc = val - dot - 8 + addend;
788 break; 788 break;
789 case R_PARISC_PCREL64:
790 /* 64-bit PC relative address */
791 *loc64 = val - dot - 8 + addend;
792 break;
789 case R_PARISC_DIR64: 793 case R_PARISC_DIR64:
790 /* 64-bit effective address */ 794 /* 64-bit effective address */
791 *loc64 = val + addend; 795 *loc64 = val + addend;
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 52eafaf74054..31c231ea56b7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -297,24 +297,24 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
297 297
298#define ATOMIC64_INIT(i) { (i) } 298#define ATOMIC64_INIT(i) { (i) }
299 299
300static __inline__ long atomic64_read(const atomic64_t *v) 300static __inline__ s64 atomic64_read(const atomic64_t *v)
301{ 301{
302 long t; 302 s64 t;
303 303
304 __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); 304 __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
305 305
306 return t; 306 return t;
307} 307}
308 308
309static __inline__ void atomic64_set(atomic64_t *v, long i) 309static __inline__ void atomic64_set(atomic64_t *v, s64 i)
310{ 310{
311 __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); 311 __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
312} 312}
313 313
314#define ATOMIC64_OP(op, asm_op) \ 314#define ATOMIC64_OP(op, asm_op) \
315static __inline__ void atomic64_##op(long a, atomic64_t *v) \ 315static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
316{ \ 316{ \
317 long t; \ 317 s64 t; \
318 \ 318 \
319 __asm__ __volatile__( \ 319 __asm__ __volatile__( \
320"1: ldarx %0,0,%3 # atomic64_" #op "\n" \ 320"1: ldarx %0,0,%3 # atomic64_" #op "\n" \
@@ -327,10 +327,10 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \
327} 327}
328 328
329#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ 329#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
330static inline long \ 330static inline s64 \
331atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ 331atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
332{ \ 332{ \
333 long t; \ 333 s64 t; \
334 \ 334 \
335 __asm__ __volatile__( \ 335 __asm__ __volatile__( \
336"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \ 336"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
@@ -345,10 +345,10 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
345} 345}
346 346
347#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \ 347#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
348static inline long \ 348static inline s64 \
349atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \ 349atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
350{ \ 350{ \
351 long res, t; \ 351 s64 res, t; \
352 \ 352 \
353 __asm__ __volatile__( \ 353 __asm__ __volatile__( \
354"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \ 354"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \
@@ -396,7 +396,7 @@ ATOMIC64_OPS(xor, xor)
396 396
397static __inline__ void atomic64_inc(atomic64_t *v) 397static __inline__ void atomic64_inc(atomic64_t *v)
398{ 398{
399 long t; 399 s64 t;
400 400
401 __asm__ __volatile__( 401 __asm__ __volatile__(
402"1: ldarx %0,0,%2 # atomic64_inc\n\ 402"1: ldarx %0,0,%2 # atomic64_inc\n\
@@ -409,9 +409,9 @@ static __inline__ void atomic64_inc(atomic64_t *v)
409} 409}
410#define atomic64_inc atomic64_inc 410#define atomic64_inc atomic64_inc
411 411
412static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v) 412static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
413{ 413{
414 long t; 414 s64 t;
415 415
416 __asm__ __volatile__( 416 __asm__ __volatile__(
417"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n" 417"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
@@ -427,7 +427,7 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
427 427
428static __inline__ void atomic64_dec(atomic64_t *v) 428static __inline__ void atomic64_dec(atomic64_t *v)
429{ 429{
430 long t; 430 s64 t;
431 431
432 __asm__ __volatile__( 432 __asm__ __volatile__(
433"1: ldarx %0,0,%2 # atomic64_dec\n\ 433"1: ldarx %0,0,%2 # atomic64_dec\n\
@@ -440,9 +440,9 @@ static __inline__ void atomic64_dec(atomic64_t *v)
440} 440}
441#define atomic64_dec atomic64_dec 441#define atomic64_dec atomic64_dec
442 442
443static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v) 443static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
444{ 444{
445 long t; 445 s64 t;
446 446
447 __asm__ __volatile__( 447 __asm__ __volatile__(
448"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n" 448"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
@@ -463,9 +463,9 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
463 * Atomically test *v and decrement if it is greater than 0. 463 * Atomically test *v and decrement if it is greater than 0.
464 * The function returns the old value of *v minus 1. 464 * The function returns the old value of *v minus 1.
465 */ 465 */
466static __inline__ long atomic64_dec_if_positive(atomic64_t *v) 466static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
467{ 467{
468 long t; 468 s64 t;
469 469
470 __asm__ __volatile__( 470 __asm__ __volatile__(
471 PPC_ATOMIC_ENTRY_BARRIER 471 PPC_ATOMIC_ENTRY_BARRIER
@@ -502,9 +502,9 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
502 * Atomically adds @a to @v, so long as it was not @u. 502 * Atomically adds @a to @v, so long as it was not @u.
503 * Returns the old value of @v. 503 * Returns the old value of @v.
504 */ 504 */
505static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) 505static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
506{ 506{
507 long t; 507 s64 t;
508 508
509 __asm__ __volatile__ ( 509 __asm__ __volatile__ (
510 PPC_ATOMIC_ENTRY_BARRIER 510 PPC_ATOMIC_ENTRY_BARRIER
@@ -534,7 +534,7 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
534 */ 534 */
535static __inline__ int atomic64_inc_not_zero(atomic64_t *v) 535static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
536{ 536{
537 long t1, t2; 537 s64 t1, t2;
538 538
539 __asm__ __volatile__ ( 539 __asm__ __volatile__ (
540 PPC_ATOMIC_ENTRY_BARRIER 540 PPC_ATOMIC_ENTRY_BARRIER
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ef573fe9873e..a9993e7a443b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -346,8 +346,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
346 346
347#define spin_cpu_relax() barrier() 347#define spin_cpu_relax() barrier()
348 348
349#define spin_cpu_yield() spin_cpu_relax()
350
351#define spin_end() HMT_medium() 349#define spin_end() HMT_medium()
352 350
353#define spin_until_cond(cond) \ 351#define spin_until_cond(cond) \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6b86055e5251..73ba246ca11d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -315,7 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
315 mfspr r11,SPRN_DSISR /* Save DSISR */ 315 mfspr r11,SPRN_DSISR /* Save DSISR */
316 std r11,_DSISR(r1) 316 std r11,_DSISR(r1)
317 std r9,_CCR(r1) /* Save CR in stackframe */ 317 std r9,_CCR(r1) /* Save CR in stackframe */
318 kuap_save_amr_and_lock r9, r10, cr1 318 /* We don't touch AMR here, we never go to virtual mode */
319 /* Save r9 through r13 from EXMC save area to stack frame. */ 319 /* Save r9 through r13 from EXMC save area to stack frame. */
320 EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) 320 EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
321 mfmsr r11 /* get MSR value */ 321 mfmsr r11 /* get MSR value */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 684b0b315c32..8c92febf5f44 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2521,7 +2521,6 @@ void ptrace_disable(struct task_struct *child)
2521{ 2521{
2522 /* make sure the single step bit is not set. */ 2522 /* make sure the single step bit is not set. */
2523 user_disable_single_step(child); 2523 user_disable_single_step(child);
2524 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
2525} 2524}
2526 2525
2527#ifdef CONFIG_PPC_ADV_DEBUG_REGS 2526#ifdef CONFIG_PPC_ADV_DEBUG_REGS
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b824f4c69622..0ab4c72515c4 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -990,8 +990,7 @@ int rtas_ibm_suspend_me(u64 handle)
990 /* Call function on all CPUs. One of us will make the 990 /* Call function on all CPUs. One of us will make the
991 * rtas call 991 * rtas call
992 */ 992 */
993 if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) 993 on_each_cpu(rtas_percpu_suspend_me, &data, 0);
994 atomic_set(&data.error, -EINVAL);
995 994
996 wait_for_completion(&done); 995 wait_for_completion(&done);
997 996
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index bb70391401f7..794404d50a85 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -50,20 +50,52 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id);
50 50
51void slb_setup_new_exec(void); 51void slb_setup_new_exec(void);
52 52
53static int realloc_context_ids(mm_context_t *ctx)
54{
55 int i, id;
56
57 /*
58 * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
59 * there wasn't one allocated previously (which happens in the exec
60 * case where ctx is newly allocated).
61 *
62 * We have to be a bit careful here. We must keep the existing ids in
63 * the array, so that we can test if they're non-zero to decide if we
64 * need to allocate a new one. However in case of error we must free the
65 * ids we've allocated but *not* any of the existing ones (or risk a
66 * UAF). That's why we decrement i at the start of the error handling
67 * loop, to skip the id that we just tested but couldn't reallocate.
68 */
69 for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
70 if (i == 0 || ctx->extended_id[i]) {
71 id = hash__alloc_context_id();
72 if (id < 0)
73 goto error;
74
75 ctx->extended_id[i] = id;
76 }
77 }
78
79 /* The caller expects us to return id */
80 return ctx->id;
81
82error:
83 for (i--; i >= 0; i--) {
84 if (ctx->extended_id[i])
85 ida_free(&mmu_context_ida, ctx->extended_id[i]);
86 }
87
88 return id;
89}
90
53static int hash__init_new_context(struct mm_struct *mm) 91static int hash__init_new_context(struct mm_struct *mm)
54{ 92{
55 int index; 93 int index;
56 94
57 index = hash__alloc_context_id();
58 if (index < 0)
59 return index;
60
61 mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), 95 mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
62 GFP_KERNEL); 96 GFP_KERNEL);
63 if (!mm->context.hash_context) { 97 if (!mm->context.hash_context)
64 ida_free(&mmu_context_ida, index);
65 return -ENOMEM; 98 return -ENOMEM;
66 }
67 99
68 /* 100 /*
69 * The old code would re-promote on fork, we don't do that when using 101 * The old code would re-promote on fork, we don't do that when using
@@ -91,13 +123,20 @@ static int hash__init_new_context(struct mm_struct *mm)
91 mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), 123 mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
92 GFP_KERNEL); 124 GFP_KERNEL);
93 if (!mm->context.hash_context->spt) { 125 if (!mm->context.hash_context->spt) {
94 ida_free(&mmu_context_ida, index);
95 kfree(mm->context.hash_context); 126 kfree(mm->context.hash_context);
96 return -ENOMEM; 127 return -ENOMEM;
97 } 128 }
98 } 129 }
99#endif 130#endif
131 }
100 132
133 index = realloc_context_ids(&mm->context);
134 if (index < 0) {
135#ifdef CONFIG_PPC_SUBPAGE_PROT
136 kfree(mm->context.hash_context->spt);
137#endif
138 kfree(mm->context.hash_context);
139 return index;
101 } 140 }
102 141
103 pkey_mm_init(mm); 142 pkey_mm_init(mm);
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 3c06ee4b2b29..40983491b95f 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -163,6 +163,7 @@
163 interrupt-parent = <&plic0>; 163 interrupt-parent = <&plic0>;
164 interrupts = <4>; 164 interrupts = <4>;
165 clocks = <&prci PRCI_CLK_TLCLK>; 165 clocks = <&prci PRCI_CLK_TLCLK>;
166 status = "disabled";
166 }; 167 };
167 uart1: serial@10011000 { 168 uart1: serial@10011000 {
168 compatible = "sifive,fu540-c000-uart", "sifive,uart0"; 169 compatible = "sifive,fu540-c000-uart", "sifive,uart0";
@@ -170,6 +171,7 @@
170 interrupt-parent = <&plic0>; 171 interrupt-parent = <&plic0>;
171 interrupts = <5>; 172 interrupts = <5>;
172 clocks = <&prci PRCI_CLK_TLCLK>; 173 clocks = <&prci PRCI_CLK_TLCLK>;
174 status = "disabled";
173 }; 175 };
174 i2c0: i2c@10030000 { 176 i2c0: i2c@10030000 {
175 compatible = "sifive,fu540-c000-i2c", "sifive,i2c0"; 177 compatible = "sifive,fu540-c000-i2c", "sifive,i2c0";
@@ -181,6 +183,7 @@
181 reg-io-width = <1>; 183 reg-io-width = <1>;
182 #address-cells = <1>; 184 #address-cells = <1>;
183 #size-cells = <0>; 185 #size-cells = <0>;
186 status = "disabled";
184 }; 187 };
185 qspi0: spi@10040000 { 188 qspi0: spi@10040000 {
186 compatible = "sifive,fu540-c000-spi", "sifive,spi0"; 189 compatible = "sifive,fu540-c000-spi", "sifive,spi0";
@@ -191,6 +194,7 @@
191 clocks = <&prci PRCI_CLK_TLCLK>; 194 clocks = <&prci PRCI_CLK_TLCLK>;
192 #address-cells = <1>; 195 #address-cells = <1>;
193 #size-cells = <0>; 196 #size-cells = <0>;
197 status = "disabled";
194 }; 198 };
195 qspi1: spi@10041000 { 199 qspi1: spi@10041000 {
196 compatible = "sifive,fu540-c000-spi", "sifive,spi0"; 200 compatible = "sifive,fu540-c000-spi", "sifive,spi0";
@@ -201,6 +205,7 @@
201 clocks = <&prci PRCI_CLK_TLCLK>; 205 clocks = <&prci PRCI_CLK_TLCLK>;
202 #address-cells = <1>; 206 #address-cells = <1>;
203 #size-cells = <0>; 207 #size-cells = <0>;
208 status = "disabled";
204 }; 209 };
205 qspi2: spi@10050000 { 210 qspi2: spi@10050000 {
206 compatible = "sifive,fu540-c000-spi", "sifive,spi0"; 211 compatible = "sifive,fu540-c000-spi", "sifive,spi0";
@@ -210,6 +215,7 @@
210 clocks = <&prci PRCI_CLK_TLCLK>; 215 clocks = <&prci PRCI_CLK_TLCLK>;
211 #address-cells = <1>; 216 #address-cells = <1>;
212 #size-cells = <0>; 217 #size-cells = <0>;
218 status = "disabled";
213 }; 219 };
214 }; 220 };
215}; 221};
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 4da88707e28f..0b55c53c08c7 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -42,7 +42,20 @@
42 }; 42 };
43}; 43};
44 44
45&uart0 {
46 status = "okay";
47};
48
49&uart1 {
50 status = "okay";
51};
52
53&i2c0 {
54 status = "okay";
55};
56
45&qspi0 { 57&qspi0 {
58 status = "okay";
46 flash@0 { 59 flash@0 {
47 compatible = "issi,is25wp256", "jedec,spi-nor"; 60 compatible = "issi,is25wp256", "jedec,spi-nor";
48 reg = <0>; 61 reg = <0>;
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 4f02967e55de..04944fb4fa7a 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -69,6 +69,7 @@ CONFIG_VIRTIO_MMIO=y
69CONFIG_CLK_SIFIVE=y 69CONFIG_CLK_SIFIVE=y
70CONFIG_CLK_SIFIVE_FU540_PRCI=y 70CONFIG_CLK_SIFIVE_FU540_PRCI=y
71CONFIG_SIFIVE_PLIC=y 71CONFIG_SIFIVE_PLIC=y
72CONFIG_SPI_SIFIVE=y
72CONFIG_EXT4_FS=y 73CONFIG_EXT4_FS=y
73CONFIG_EXT4_FS_POSIX_ACL=y 74CONFIG_EXT4_FS_POSIX_ACL=y
74CONFIG_AUTOFS4_FS=y 75CONFIG_AUTOFS4_FS=y
@@ -84,4 +85,8 @@ CONFIG_ROOT_NFS=y
84CONFIG_CRYPTO_USER_API_HASH=y 85CONFIG_CRYPTO_USER_API_HASH=y
85CONFIG_CRYPTO_DEV_VIRTIO=y 86CONFIG_CRYPTO_DEV_VIRTIO=y
86CONFIG_PRINTK_TIME=y 87CONFIG_PRINTK_TIME=y
88CONFIG_SPI=y
89CONFIG_MMC_SPI=y
90CONFIG_MMC=y
91CONFIG_DEVTMPFS_MOUNT=y
87# CONFIG_RCU_TRACE is not set 92# CONFIG_RCU_TRACE is not set
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 9038aeb900a6..96f95c9ebd97 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -38,11 +38,11 @@ static __always_inline void atomic_set(atomic_t *v, int i)
38 38
39#ifndef CONFIG_GENERIC_ATOMIC64 39#ifndef CONFIG_GENERIC_ATOMIC64
40#define ATOMIC64_INIT(i) { (i) } 40#define ATOMIC64_INIT(i) { (i) }
41static __always_inline long atomic64_read(const atomic64_t *v) 41static __always_inline s64 atomic64_read(const atomic64_t *v)
42{ 42{
43 return READ_ONCE(v->counter); 43 return READ_ONCE(v->counter);
44} 44}
45static __always_inline void atomic64_set(atomic64_t *v, long i) 45static __always_inline void atomic64_set(atomic64_t *v, s64 i)
46{ 46{
47 WRITE_ONCE(v->counter, i); 47 WRITE_ONCE(v->counter, i);
48} 48}
@@ -66,11 +66,11 @@ void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
66 66
67#ifdef CONFIG_GENERIC_ATOMIC64 67#ifdef CONFIG_GENERIC_ATOMIC64
68#define ATOMIC_OPS(op, asm_op, I) \ 68#define ATOMIC_OPS(op, asm_op, I) \
69 ATOMIC_OP (op, asm_op, I, w, int, ) 69 ATOMIC_OP (op, asm_op, I, w, int, )
70#else 70#else
71#define ATOMIC_OPS(op, asm_op, I) \ 71#define ATOMIC_OPS(op, asm_op, I) \
72 ATOMIC_OP (op, asm_op, I, w, int, ) \ 72 ATOMIC_OP (op, asm_op, I, w, int, ) \
73 ATOMIC_OP (op, asm_op, I, d, long, 64) 73 ATOMIC_OP (op, asm_op, I, d, s64, 64)
74#endif 74#endif
75 75
76ATOMIC_OPS(add, add, i) 76ATOMIC_OPS(add, add, i)
@@ -127,14 +127,14 @@ c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
127 127
128#ifdef CONFIG_GENERIC_ATOMIC64 128#ifdef CONFIG_GENERIC_ATOMIC64
129#define ATOMIC_OPS(op, asm_op, c_op, I) \ 129#define ATOMIC_OPS(op, asm_op, c_op, I) \
130 ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ 130 ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
131 ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) 131 ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, )
132#else 132#else
133#define ATOMIC_OPS(op, asm_op, c_op, I) \ 133#define ATOMIC_OPS(op, asm_op, c_op, I) \
134 ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ 134 ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
135 ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \ 135 ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \
136 ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \ 136 ATOMIC_FETCH_OP( op, asm_op, I, d, s64, 64) \
137 ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64) 137 ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, s64, 64)
138#endif 138#endif
139 139
140ATOMIC_OPS(add, add, +, i) 140ATOMIC_OPS(add, add, +, i)
@@ -166,11 +166,11 @@ ATOMIC_OPS(sub, add, +, -i)
166 166
167#ifdef CONFIG_GENERIC_ATOMIC64 167#ifdef CONFIG_GENERIC_ATOMIC64
168#define ATOMIC_OPS(op, asm_op, I) \ 168#define ATOMIC_OPS(op, asm_op, I) \
169 ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) 169 ATOMIC_FETCH_OP(op, asm_op, I, w, int, )
170#else 170#else
171#define ATOMIC_OPS(op, asm_op, I) \ 171#define ATOMIC_OPS(op, asm_op, I) \
172 ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \ 172 ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \
173 ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64) 173 ATOMIC_FETCH_OP(op, asm_op, I, d, s64, 64)
174#endif 174#endif
175 175
176ATOMIC_OPS(and, and, i) 176ATOMIC_OPS(and, and, i)
@@ -219,9 +219,10 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
219#define atomic_fetch_add_unless atomic_fetch_add_unless 219#define atomic_fetch_add_unless atomic_fetch_add_unless
220 220
221#ifndef CONFIG_GENERIC_ATOMIC64 221#ifndef CONFIG_GENERIC_ATOMIC64
222static __always_inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) 222static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
223{ 223{
224 long prev, rc; 224 s64 prev;
225 long rc;
225 226
226 __asm__ __volatile__ ( 227 __asm__ __volatile__ (
227 "0: lr.d %[p], %[c]\n" 228 "0: lr.d %[p], %[c]\n"
@@ -290,11 +291,11 @@ c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
290 291
291#ifdef CONFIG_GENERIC_ATOMIC64 292#ifdef CONFIG_GENERIC_ATOMIC64
292#define ATOMIC_OPS() \ 293#define ATOMIC_OPS() \
293 ATOMIC_OP( int, , 4) 294 ATOMIC_OP(int, , 4)
294#else 295#else
295#define ATOMIC_OPS() \ 296#define ATOMIC_OPS() \
296 ATOMIC_OP( int, , 4) \ 297 ATOMIC_OP(int, , 4) \
297 ATOMIC_OP(long, 64, 8) 298 ATOMIC_OP(s64, 64, 8)
298#endif 299#endif
299 300
300ATOMIC_OPS() 301ATOMIC_OPS()
@@ -332,9 +333,10 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
332#define atomic_dec_if_positive(v) atomic_sub_if_positive(v, 1) 333#define atomic_dec_if_positive(v) atomic_sub_if_positive(v, 1)
333 334
334#ifndef CONFIG_GENERIC_ATOMIC64 335#ifndef CONFIG_GENERIC_ATOMIC64
335static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset) 336static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset)
336{ 337{
337 long prev, rc; 338 s64 prev;
339 long rc;
338 340
339 __asm__ __volatile__ ( 341 __asm__ __volatile__ (
340 "0: lr.d %[p], %[c]\n" 342 "0: lr.d %[p], %[c]\n"
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 3e2708c626a8..f960c3f4ce47 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -272,9 +272,6 @@ vmalloc_fault:
272 * entries, but in RISC-V, SFENCE.VMA specifies an 272 * entries, but in RISC-V, SFENCE.VMA specifies an
273 * ordering constraint, not a cache flush; it is 273 * ordering constraint, not a cache flush; it is
274 * necessary even after writing invalid entries. 274 * necessary even after writing invalid entries.
275 * Relying on flush_tlb_fix_spurious_fault would
276 * suffice, but the extra traps reduce
277 * performance. So, eagerly SFENCE.VMA.
278 */ 275 */
279 local_flush_tlb_page(addr); 276 local_flush_tlb_page(addr);
280 277
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 109243fdb6ec..fdb4246265a5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -1,4 +1,7 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2config ARCH_HAS_MEM_ENCRYPT
3 def_bool y
4
2config MMU 5config MMU
3 def_bool y 6 def_bool y
4 7
@@ -30,7 +33,7 @@ config GENERIC_BUG_RELATIVE_POINTERS
30 def_bool y 33 def_bool y
31 34
32config GENERIC_LOCKBREAK 35config GENERIC_LOCKBREAK
33 def_bool y if SMP && PREEMPT 36 def_bool y if PREEMPT
34 37
35config PGSTE 38config PGSTE
36 def_bool y if KVM 39 def_bool y if KVM
@@ -113,7 +116,6 @@ config S390
113 select DYNAMIC_FTRACE if FUNCTION_TRACER 116 select DYNAMIC_FTRACE if FUNCTION_TRACER
114 select GENERIC_CLOCKEVENTS 117 select GENERIC_CLOCKEVENTS
115 select GENERIC_CPU_AUTOPROBE 118 select GENERIC_CPU_AUTOPROBE
116 select GENERIC_CPU_DEVICES if !SMP
117 select GENERIC_CPU_VULNERABILITIES 119 select GENERIC_CPU_VULNERABILITIES
118 select GENERIC_FIND_FIRST_BIT 120 select GENERIC_FIND_FIRST_BIT
119 select GENERIC_SMP_IDLE_THREAD 121 select GENERIC_SMP_IDLE_THREAD
@@ -187,6 +189,8 @@ config S390
187 select VIRT_CPU_ACCOUNTING 189 select VIRT_CPU_ACCOUNTING
188 select ARCH_HAS_SCALED_CPUTIME 190 select ARCH_HAS_SCALED_CPUTIME
189 select HAVE_NMI 191 select HAVE_NMI
192 select SWIOTLB
193 select GENERIC_ALLOCATOR
190 194
191 195
192config SCHED_OMIT_FRAME_POINTER 196config SCHED_OMIT_FRAME_POINTER
@@ -399,27 +403,10 @@ config SYSVIPC_COMPAT
399 403
400config SMP 404config SMP
401 def_bool y 405 def_bool y
402 prompt "Symmetric multi-processing support"
403 ---help---
404 This enables support for systems with more than one CPU. If you have
405 a system with only one CPU, like most personal computers, say N. If
406 you have a system with more than one CPU, say Y.
407
408 If you say N here, the kernel will run on uni- and multiprocessor
409 machines, but will use only one CPU of a multiprocessor machine. If
410 you say Y here, the kernel will run on many, but not all,
411 uniprocessor machines. On a uniprocessor machine, the kernel
412 will run faster if you say N here.
413
414 See also the SMP-HOWTO available at
415 <http://www.tldp.org/docs.html#howto>.
416
417 Even if you don't know what to do here, say Y.
418 406
419config NR_CPUS 407config NR_CPUS
420 int "Maximum number of CPUs (2-512)" 408 int "Maximum number of CPUs (2-512)"
421 range 2 512 409 range 2 512
422 depends on SMP
423 default "64" 410 default "64"
424 help 411 help
425 This allows you to specify the maximum number of CPUs which this 412 This allows you to specify the maximum number of CPUs which this
@@ -431,12 +418,6 @@ config NR_CPUS
431 418
432config HOTPLUG_CPU 419config HOTPLUG_CPU
433 def_bool y 420 def_bool y
434 prompt "Support for hot-pluggable CPUs"
435 depends on SMP
436 help
437 Say Y here to be able to turn CPUs off and on. CPUs
438 can be controlled through /sys/devices/system/cpu/cpu#.
439 Say N if you want to disable CPU hotplug.
440 421
441# Some NUMA nodes have memory ranges that span 422# Some NUMA nodes have memory ranges that span
442# other nodes. Even though a pfn is valid and 423# other nodes. Even though a pfn is valid and
@@ -448,7 +429,7 @@ config NODES_SPAN_OTHER_NODES
448 429
449config NUMA 430config NUMA
450 bool "NUMA support" 431 bool "NUMA support"
451 depends on SMP && SCHED_TOPOLOGY 432 depends on SCHED_TOPOLOGY
452 default n 433 default n
453 help 434 help
454 Enable NUMA support 435 Enable NUMA support
@@ -523,7 +504,6 @@ config SCHED_DRAWER
523config SCHED_TOPOLOGY 504config SCHED_TOPOLOGY
524 def_bool y 505 def_bool y
525 prompt "Topology scheduler support" 506 prompt "Topology scheduler support"
526 depends on SMP
527 select SCHED_SMT 507 select SCHED_SMT
528 select SCHED_MC 508 select SCHED_MC
529 select SCHED_BOOK 509 select SCHED_BOOK
@@ -763,7 +743,7 @@ config PCI_NR_FUNCTIONS
763 This allows you to specify the maximum number of PCI functions which 743 This allows you to specify the maximum number of PCI functions which
764 this kernel will support. 744 this kernel will support.
765 745
766endif # PCI 746endif # PCI
767 747
768config HAS_IOMEM 748config HAS_IOMEM
769 def_bool PCI 749 def_bool PCI
@@ -829,16 +809,15 @@ menu "Dump support"
829 809
830config CRASH_DUMP 810config CRASH_DUMP
831 bool "kernel crash dumps" 811 bool "kernel crash dumps"
832 depends on SMP
833 select KEXEC 812 select KEXEC
834 help 813 help
835 Generate crash dump after being started by kexec. 814 Generate crash dump after being started by kexec.
836 Crash dump kernels are loaded in the main kernel with kexec-tools 815 Crash dump kernels are loaded in the main kernel with kexec-tools
837 into a specially reserved region and then later executed after 816 into a specially reserved region and then later executed after
838 a crash by kdump/kexec. 817 a crash by kdump/kexec.
839 Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. 818 Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this.
840 This option also enables s390 zfcpdump. 819 This option also enables s390 zfcpdump.
841 See also <file:Documentation/s390/zfcpdump.txt> 820 See also <file:Documentation/s390/zfcpdump.rst>
842 821
843endmenu 822endmenu
844 823
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index b0920b35f87b..a6dc01a22048 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -88,6 +88,7 @@ CONFIG_HOTPLUG_PCI=y
88CONFIG_HOTPLUG_PCI_S390=y 88CONFIG_HOTPLUG_PCI_S390=y
89CONFIG_CHSC_SCH=y 89CONFIG_CHSC_SCH=y
90CONFIG_VFIO_AP=m 90CONFIG_VFIO_AP=m
91CONFIG_VFIO_CCW=m
91CONFIG_CRASH_DUMP=y 92CONFIG_CRASH_DUMP=y
92CONFIG_BINFMT_MISC=m 93CONFIG_BINFMT_MISC=m
93CONFIG_HIBERNATION=y 94CONFIG_HIBERNATION=y
@@ -498,6 +499,7 @@ CONFIG_VIRTIO_PCI=m
498CONFIG_VIRTIO_BALLOON=m 499CONFIG_VIRTIO_BALLOON=m
499CONFIG_VIRTIO_INPUT=y 500CONFIG_VIRTIO_INPUT=y
500CONFIG_S390_AP_IOMMU=y 501CONFIG_S390_AP_IOMMU=y
502CONFIG_S390_CCW_IOMMU=y
501CONFIG_EXT4_FS=y 503CONFIG_EXT4_FS=y
502CONFIG_EXT4_FS_POSIX_ACL=y 504CONFIG_EXT4_FS_POSIX_ACL=y
503CONFIG_EXT4_FS_SECURITY=y 505CONFIG_EXT4_FS_SECURITY=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index c59b922cb6c5..e4bc40073003 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -1,21 +1,22 @@
1CONFIG_SYSVIPC=y 1CONFIG_SYSVIPC=y
2CONFIG_POSIX_MQUEUE=y 2CONFIG_POSIX_MQUEUE=y
3CONFIG_USELIB=y
4CONFIG_AUDIT=y 3CONFIG_AUDIT=y
5CONFIG_NO_HZ_IDLE=y 4CONFIG_NO_HZ_IDLE=y
6CONFIG_HIGH_RES_TIMERS=y 5CONFIG_HIGH_RES_TIMERS=y
6CONFIG_BSD_PROCESS_ACCT=y
7CONFIG_BSD_PROCESS_ACCT_V3=y
7CONFIG_TASKSTATS=y 8CONFIG_TASKSTATS=y
8CONFIG_TASK_DELAY_ACCT=y 9CONFIG_TASK_DELAY_ACCT=y
9CONFIG_TASK_XACCT=y 10CONFIG_TASK_XACCT=y
10CONFIG_TASK_IO_ACCOUNTING=y 11CONFIG_TASK_IO_ACCOUNTING=y
11# CONFIG_CPU_ISOLATION is not set
12CONFIG_IKCONFIG=y 12CONFIG_IKCONFIG=y
13CONFIG_IKCONFIG_PROC=y 13CONFIG_IKCONFIG_PROC=y
14CONFIG_CGROUPS=y 14CONFIG_NUMA_BALANCING=y
15# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
15CONFIG_MEMCG=y 16CONFIG_MEMCG=y
16CONFIG_MEMCG_SWAP=y 17CONFIG_MEMCG_SWAP=y
17CONFIG_BLK_CGROUP=y 18CONFIG_BLK_CGROUP=y
18CONFIG_CGROUP_SCHED=y 19CONFIG_CFS_BANDWIDTH=y
19CONFIG_RT_GROUP_SCHED=y 20CONFIG_RT_GROUP_SCHED=y
20CONFIG_CGROUP_PIDS=y 21CONFIG_CGROUP_PIDS=y
21CONFIG_CGROUP_FREEZER=y 22CONFIG_CGROUP_FREEZER=y
@@ -26,98 +27,402 @@ CONFIG_CGROUP_CPUACCT=y
26CONFIG_CGROUP_PERF=y 27CONFIG_CGROUP_PERF=y
27CONFIG_NAMESPACES=y 28CONFIG_NAMESPACES=y
28CONFIG_USER_NS=y 29CONFIG_USER_NS=y
29CONFIG_CHECKPOINT_RESTORE=y 30CONFIG_SCHED_AUTOGROUP=y
30CONFIG_BLK_DEV_INITRD=y 31CONFIG_BLK_DEV_INITRD=y
31CONFIG_EXPERT=y 32CONFIG_EXPERT=y
32# CONFIG_SYSFS_SYSCALL is not set 33# CONFIG_SYSFS_SYSCALL is not set
34CONFIG_CHECKPOINT_RESTORE=y
33CONFIG_BPF_SYSCALL=y 35CONFIG_BPF_SYSCALL=y
34CONFIG_USERFAULTFD=y 36CONFIG_USERFAULTFD=y
35# CONFIG_COMPAT_BRK is not set 37# CONFIG_COMPAT_BRK is not set
36CONFIG_PROFILING=y 38CONFIG_PROFILING=y
37CONFIG_LIVEPATCH=y 39CONFIG_OPROFILE=m
38CONFIG_NR_CPUS=256
39CONFIG_NUMA=y
40CONFIG_HZ_100=y
41CONFIG_KEXEC_FILE=y
42CONFIG_KEXEC_VERIFY_SIG=y
43CONFIG_CRASH_DUMP=y
44CONFIG_HIBERNATION=y
45CONFIG_PM_DEBUG=y
46CONFIG_CMM=m
47CONFIG_OPROFILE=y
48CONFIG_KPROBES=y 40CONFIG_KPROBES=y
49CONFIG_JUMP_LABEL=y 41CONFIG_JUMP_LABEL=y
50CONFIG_STATIC_KEYS_SELFTEST=y
51CONFIG_MODULES=y 42CONFIG_MODULES=y
43CONFIG_MODULE_FORCE_LOAD=y
52CONFIG_MODULE_UNLOAD=y 44CONFIG_MODULE_UNLOAD=y
45CONFIG_MODULE_FORCE_UNLOAD=y
46CONFIG_MODVERSIONS=y
47CONFIG_MODULE_SRCVERSION_ALL=y
48CONFIG_MODULE_SIG=y
49CONFIG_MODULE_SIG_SHA256=y
53CONFIG_BLK_DEV_INTEGRITY=y 50CONFIG_BLK_DEV_INTEGRITY=y
51CONFIG_BLK_DEV_THROTTLING=y
52CONFIG_BLK_WBT=y
53CONFIG_BLK_WBT_SQ=y
54CONFIG_PARTITION_ADVANCED=y 54CONFIG_PARTITION_ADVANCED=y
55CONFIG_IBM_PARTITION=y 55CONFIG_IBM_PARTITION=y
56CONFIG_BSD_DISKLABEL=y
57CONFIG_MINIX_SUBPARTITION=y
58CONFIG_SOLARIS_X86_PARTITION=y
59CONFIG_UNIXWARE_DISKLABEL=y
60CONFIG_CFQ_GROUP_IOSCHED=y
56CONFIG_DEFAULT_DEADLINE=y 61CONFIG_DEFAULT_DEADLINE=y
57CONFIG_BINFMT_MISC=m 62CONFIG_LIVEPATCH=y
63CONFIG_TUNE_ZEC12=y
64CONFIG_NR_CPUS=512
65CONFIG_NUMA=y
66CONFIG_HZ_100=y
67CONFIG_KEXEC_FILE=y
68CONFIG_KEXEC_VERIFY_SIG=y
69CONFIG_EXPOLINE=y
70CONFIG_EXPOLINE_AUTO=y
58CONFIG_MEMORY_HOTPLUG=y 71CONFIG_MEMORY_HOTPLUG=y
59CONFIG_MEMORY_HOTREMOVE=y 72CONFIG_MEMORY_HOTREMOVE=y
60CONFIG_KSM=y 73CONFIG_KSM=y
61CONFIG_TRANSPARENT_HUGEPAGE=y 74CONFIG_TRANSPARENT_HUGEPAGE=y
62CONFIG_CLEANCACHE=y 75CONFIG_CLEANCACHE=y
63CONFIG_FRONTSWAP=y 76CONFIG_FRONTSWAP=y
77CONFIG_MEM_SOFT_DIRTY=y
64CONFIG_ZSWAP=y 78CONFIG_ZSWAP=y
65CONFIG_ZBUD=m 79CONFIG_ZBUD=m
66CONFIG_ZSMALLOC=m 80CONFIG_ZSMALLOC=m
67CONFIG_ZSMALLOC_STAT=y 81CONFIG_ZSMALLOC_STAT=y
82CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
68CONFIG_IDLE_PAGE_TRACKING=y 83CONFIG_IDLE_PAGE_TRACKING=y
84CONFIG_PCI=y
85CONFIG_HOTPLUG_PCI=y
86CONFIG_HOTPLUG_PCI_S390=y
87CONFIG_CHSC_SCH=y
88CONFIG_VFIO_AP=m
89CONFIG_VFIO_CCW=m
90CONFIG_CRASH_DUMP=y
91CONFIG_BINFMT_MISC=m
92CONFIG_HIBERNATION=y
93CONFIG_PM_DEBUG=y
69CONFIG_NET=y 94CONFIG_NET=y
70CONFIG_PACKET=y 95CONFIG_PACKET=y
96CONFIG_PACKET_DIAG=m
71CONFIG_UNIX=y 97CONFIG_UNIX=y
72CONFIG_NET_KEY=y 98CONFIG_UNIX_DIAG=m
99CONFIG_XFRM_USER=m
100CONFIG_NET_KEY=m
101CONFIG_SMC=m
102CONFIG_SMC_DIAG=m
73CONFIG_INET=y 103CONFIG_INET=y
74CONFIG_IP_MULTICAST=y 104CONFIG_IP_MULTICAST=y
105CONFIG_IP_ADVANCED_ROUTER=y
106CONFIG_IP_MULTIPLE_TABLES=y
107CONFIG_IP_ROUTE_MULTIPATH=y
108CONFIG_IP_ROUTE_VERBOSE=y
109CONFIG_NET_IPIP=m
110CONFIG_NET_IPGRE_DEMUX=m
111CONFIG_NET_IPGRE=m
112CONFIG_NET_IPGRE_BROADCAST=y
113CONFIG_IP_MROUTE=y
114CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
115CONFIG_IP_PIMSM_V1=y
116CONFIG_IP_PIMSM_V2=y
117CONFIG_SYN_COOKIES=y
118CONFIG_NET_IPVTI=m
119CONFIG_INET_AH=m
120CONFIG_INET_ESP=m
121CONFIG_INET_IPCOMP=m
122CONFIG_INET_XFRM_MODE_TRANSPORT=m
123CONFIG_INET_XFRM_MODE_TUNNEL=m
124CONFIG_INET_XFRM_MODE_BEET=m
125CONFIG_INET_DIAG=m
126CONFIG_INET_UDP_DIAG=m
127CONFIG_TCP_CONG_ADVANCED=y
128CONFIG_TCP_CONG_HSTCP=m
129CONFIG_TCP_CONG_HYBLA=m
130CONFIG_TCP_CONG_SCALABLE=m
131CONFIG_TCP_CONG_LP=m
132CONFIG_TCP_CONG_VENO=m
133CONFIG_TCP_CONG_YEAH=m
134CONFIG_TCP_CONG_ILLINOIS=m
135CONFIG_IPV6_ROUTER_PREF=y
136CONFIG_INET6_AH=m
137CONFIG_INET6_ESP=m
138CONFIG_INET6_IPCOMP=m
139CONFIG_IPV6_MIP6=m
140CONFIG_INET6_XFRM_MODE_TRANSPORT=m
141CONFIG_INET6_XFRM_MODE_TUNNEL=m
142CONFIG_INET6_XFRM_MODE_BEET=m
143CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
144CONFIG_IPV6_VTI=m
145CONFIG_IPV6_SIT=m
146CONFIG_IPV6_GRE=m
147CONFIG_IPV6_MULTIPLE_TABLES=y
148CONFIG_IPV6_SUBTREES=y
149CONFIG_NETFILTER=y
150CONFIG_NF_CONNTRACK=m
151CONFIG_NF_CONNTRACK_SECMARK=y
152CONFIG_NF_CONNTRACK_EVENTS=y
153CONFIG_NF_CONNTRACK_TIMEOUT=y
154CONFIG_NF_CONNTRACK_TIMESTAMP=y
155CONFIG_NF_CONNTRACK_AMANDA=m
156CONFIG_NF_CONNTRACK_FTP=m
157CONFIG_NF_CONNTRACK_H323=m
158CONFIG_NF_CONNTRACK_IRC=m
159CONFIG_NF_CONNTRACK_NETBIOS_NS=m
160CONFIG_NF_CONNTRACK_SNMP=m
161CONFIG_NF_CONNTRACK_PPTP=m
162CONFIG_NF_CONNTRACK_SANE=m
163CONFIG_NF_CONNTRACK_SIP=m
164CONFIG_NF_CONNTRACK_TFTP=m
165CONFIG_NF_CT_NETLINK=m
166CONFIG_NF_CT_NETLINK_TIMEOUT=m
167CONFIG_NF_TABLES=m
168CONFIG_NFT_CT=m
169CONFIG_NFT_COUNTER=m
170CONFIG_NFT_LOG=m
171CONFIG_NFT_LIMIT=m
172CONFIG_NFT_NAT=m
173CONFIG_NFT_COMPAT=m
174CONFIG_NFT_HASH=m
175CONFIG_NETFILTER_XT_SET=m
176CONFIG_NETFILTER_XT_TARGET_AUDIT=m
177CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
178CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
179CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
180CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
181CONFIG_NETFILTER_XT_TARGET_CT=m
182CONFIG_NETFILTER_XT_TARGET_DSCP=m
183CONFIG_NETFILTER_XT_TARGET_HMARK=m
184CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
185CONFIG_NETFILTER_XT_TARGET_LOG=m
186CONFIG_NETFILTER_XT_TARGET_MARK=m
187CONFIG_NETFILTER_XT_TARGET_NFLOG=m
188CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
189CONFIG_NETFILTER_XT_TARGET_TEE=m
190CONFIG_NETFILTER_XT_TARGET_TPROXY=m
191CONFIG_NETFILTER_XT_TARGET_TRACE=m
192CONFIG_NETFILTER_XT_TARGET_SECMARK=m
193CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
194CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
195CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
196CONFIG_NETFILTER_XT_MATCH_BPF=m
197CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
198CONFIG_NETFILTER_XT_MATCH_COMMENT=m
199CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
200CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
201CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
202CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
203CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
204CONFIG_NETFILTER_XT_MATCH_CPU=m
205CONFIG_NETFILTER_XT_MATCH_DCCP=m
206CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
207CONFIG_NETFILTER_XT_MATCH_DSCP=m
208CONFIG_NETFILTER_XT_MATCH_ESP=m
209CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
210CONFIG_NETFILTER_XT_MATCH_HELPER=m
211CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
212CONFIG_NETFILTER_XT_MATCH_IPVS=m
213CONFIG_NETFILTER_XT_MATCH_LENGTH=m
214CONFIG_NETFILTER_XT_MATCH_LIMIT=m
215CONFIG_NETFILTER_XT_MATCH_MAC=m
216CONFIG_NETFILTER_XT_MATCH_MARK=m
217CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
218CONFIG_NETFILTER_XT_MATCH_NFACCT=m
219CONFIG_NETFILTER_XT_MATCH_OSF=m
220CONFIG_NETFILTER_XT_MATCH_OWNER=m
221CONFIG_NETFILTER_XT_MATCH_POLICY=m
222CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
223CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
224CONFIG_NETFILTER_XT_MATCH_QUOTA=m
225CONFIG_NETFILTER_XT_MATCH_RATEEST=m
226CONFIG_NETFILTER_XT_MATCH_REALM=m
227CONFIG_NETFILTER_XT_MATCH_RECENT=m
228CONFIG_NETFILTER_XT_MATCH_STATE=m
229CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
230CONFIG_NETFILTER_XT_MATCH_STRING=m
231CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
232CONFIG_NETFILTER_XT_MATCH_TIME=m
233CONFIG_NETFILTER_XT_MATCH_U32=m
234CONFIG_IP_SET=m
235CONFIG_IP_SET_BITMAP_IP=m
236CONFIG_IP_SET_BITMAP_IPMAC=m
237CONFIG_IP_SET_BITMAP_PORT=m
238CONFIG_IP_SET_HASH_IP=m
239CONFIG_IP_SET_HASH_IPPORT=m
240CONFIG_IP_SET_HASH_IPPORTIP=m
241CONFIG_IP_SET_HASH_IPPORTNET=m
242CONFIG_IP_SET_HASH_NETPORTNET=m
243CONFIG_IP_SET_HASH_NET=m
244CONFIG_IP_SET_HASH_NETNET=m
245CONFIG_IP_SET_HASH_NETPORT=m
246CONFIG_IP_SET_HASH_NETIFACE=m
247CONFIG_IP_SET_LIST_SET=m
248CONFIG_IP_VS=m
249CONFIG_IP_VS_PROTO_TCP=y
250CONFIG_IP_VS_PROTO_UDP=y
251CONFIG_IP_VS_PROTO_ESP=y
252CONFIG_IP_VS_PROTO_AH=y
253CONFIG_IP_VS_RR=m
254CONFIG_IP_VS_WRR=m
255CONFIG_IP_VS_LC=m
256CONFIG_IP_VS_WLC=m
257CONFIG_IP_VS_LBLC=m
258CONFIG_IP_VS_LBLCR=m
259CONFIG_IP_VS_DH=m
260CONFIG_IP_VS_SH=m
261CONFIG_IP_VS_SED=m
262CONFIG_IP_VS_NQ=m
263CONFIG_IP_VS_FTP=m
264CONFIG_IP_VS_PE_SIP=m
265CONFIG_NF_CONNTRACK_IPV4=m
266CONFIG_NF_TABLES_IPV4=y
267CONFIG_NFT_CHAIN_ROUTE_IPV4=m
268CONFIG_NF_TABLES_ARP=y
269CONFIG_NFT_CHAIN_NAT_IPV4=m
270CONFIG_IP_NF_IPTABLES=m
271CONFIG_IP_NF_MATCH_AH=m
272CONFIG_IP_NF_MATCH_ECN=m
273CONFIG_IP_NF_MATCH_RPFILTER=m
274CONFIG_IP_NF_MATCH_TTL=m
275CONFIG_IP_NF_FILTER=m
276CONFIG_IP_NF_TARGET_REJECT=m
277CONFIG_IP_NF_NAT=m
278CONFIG_IP_NF_TARGET_MASQUERADE=m
279CONFIG_IP_NF_MANGLE=m
280CONFIG_IP_NF_TARGET_CLUSTERIP=m
281CONFIG_IP_NF_TARGET_ECN=m
282CONFIG_IP_NF_TARGET_TTL=m
283CONFIG_IP_NF_RAW=m
284CONFIG_IP_NF_SECURITY=m
285CONFIG_IP_NF_ARPTABLES=m
286CONFIG_IP_NF_ARPFILTER=m
287CONFIG_IP_NF_ARP_MANGLE=m
288CONFIG_NF_CONNTRACK_IPV6=m
289CONFIG_NF_TABLES_IPV6=y
290CONFIG_NFT_CHAIN_ROUTE_IPV6=m
291CONFIG_NFT_CHAIN_NAT_IPV6=m
292CONFIG_IP6_NF_IPTABLES=m
293CONFIG_IP6_NF_MATCH_AH=m
294CONFIG_IP6_NF_MATCH_EUI64=m
295CONFIG_IP6_NF_MATCH_FRAG=m
296CONFIG_IP6_NF_MATCH_OPTS=m
297CONFIG_IP6_NF_MATCH_HL=m
298CONFIG_IP6_NF_MATCH_IPV6HEADER=m
299CONFIG_IP6_NF_MATCH_MH=m
300CONFIG_IP6_NF_MATCH_RPFILTER=m
301CONFIG_IP6_NF_MATCH_RT=m
302CONFIG_IP6_NF_TARGET_HL=m
303CONFIG_IP6_NF_FILTER=m
304CONFIG_IP6_NF_TARGET_REJECT=m
305CONFIG_IP6_NF_MANGLE=m
306CONFIG_IP6_NF_RAW=m
307CONFIG_IP6_NF_SECURITY=m
308CONFIG_IP6_NF_NAT=m
309CONFIG_IP6_NF_TARGET_MASQUERADE=m
310CONFIG_NF_TABLES_BRIDGE=y
311CONFIG_RDS=m
312CONFIG_RDS_RDMA=m
313CONFIG_RDS_TCP=m
75CONFIG_L2TP=m 314CONFIG_L2TP=m
76CONFIG_L2TP_DEBUGFS=m 315CONFIG_L2TP_DEBUGFS=m
77CONFIG_VLAN_8021Q=y 316CONFIG_L2TP_V3=y
317CONFIG_L2TP_IP=m
318CONFIG_L2TP_ETH=m
319CONFIG_BRIDGE=m
320CONFIG_VLAN_8021Q=m
321CONFIG_VLAN_8021Q_GVRP=y
78CONFIG_NET_SCHED=y 322CONFIG_NET_SCHED=y
79CONFIG_NET_SCH_CBQ=m 323CONFIG_NET_SCH_CBQ=m
324CONFIG_NET_SCH_HTB=m
325CONFIG_NET_SCH_HFSC=m
80CONFIG_NET_SCH_PRIO=m 326CONFIG_NET_SCH_PRIO=m
327CONFIG_NET_SCH_MULTIQ=m
81CONFIG_NET_SCH_RED=m 328CONFIG_NET_SCH_RED=m
329CONFIG_NET_SCH_SFB=m
82CONFIG_NET_SCH_SFQ=m 330CONFIG_NET_SCH_SFQ=m
83CONFIG_NET_SCH_TEQL=m 331CONFIG_NET_SCH_TEQL=m
84CONFIG_NET_SCH_TBF=m 332CONFIG_NET_SCH_TBF=m
85CONFIG_NET_SCH_GRED=m 333CONFIG_NET_SCH_GRED=m
86CONFIG_NET_SCH_DSMARK=m 334CONFIG_NET_SCH_DSMARK=m
335CONFIG_NET_SCH_NETEM=m
336CONFIG_NET_SCH_DRR=m
337CONFIG_NET_SCH_MQPRIO=m
338CONFIG_NET_SCH_CHOKE=m
339CONFIG_NET_SCH_QFQ=m
340CONFIG_NET_SCH_CODEL=m
341CONFIG_NET_SCH_FQ_CODEL=m
342CONFIG_NET_SCH_INGRESS=m
343CONFIG_NET_SCH_PLUG=m
344CONFIG_NET_CLS_BASIC=m
87CONFIG_NET_CLS_TCINDEX=m 345CONFIG_NET_CLS_TCINDEX=m
88CONFIG_NET_CLS_ROUTE4=m 346CONFIG_NET_CLS_ROUTE4=m
89CONFIG_NET_CLS_FW=m 347CONFIG_NET_CLS_FW=m
90CONFIG_NET_CLS_U32=m 348CONFIG_NET_CLS_U32=m
349CONFIG_CLS_U32_PERF=y
91CONFIG_CLS_U32_MARK=y 350CONFIG_CLS_U32_MARK=y
92CONFIG_NET_CLS_RSVP=m 351CONFIG_NET_CLS_RSVP=m
93CONFIG_NET_CLS_RSVP6=m 352CONFIG_NET_CLS_RSVP6=m
353CONFIG_NET_CLS_FLOW=m
354CONFIG_NET_CLS_CGROUP=y
355CONFIG_NET_CLS_BPF=m
94CONFIG_NET_CLS_ACT=y 356CONFIG_NET_CLS_ACT=y
95CONFIG_NET_ACT_POLICE=y 357CONFIG_NET_ACT_POLICE=m
358CONFIG_NET_ACT_GACT=m
359CONFIG_GACT_PROB=y
360CONFIG_NET_ACT_MIRRED=m
361CONFIG_NET_ACT_IPT=m
362CONFIG_NET_ACT_NAT=m
363CONFIG_NET_ACT_PEDIT=m
364CONFIG_NET_ACT_SIMP=m
365CONFIG_NET_ACT_SKBEDIT=m
366CONFIG_NET_ACT_CSUM=m
367CONFIG_DNS_RESOLVER=y
368CONFIG_OPENVSWITCH=m
369CONFIG_VSOCKETS=m
370CONFIG_VIRTIO_VSOCKETS=m
371CONFIG_NETLINK_DIAG=m
372CONFIG_CGROUP_NET_PRIO=y
96CONFIG_BPF_JIT=y 373CONFIG_BPF_JIT=y
97CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 374CONFIG_NET_PKTGEN=m
98CONFIG_DEVTMPFS=y 375CONFIG_DEVTMPFS=y
376CONFIG_DMA_CMA=y
377CONFIG_CMA_SIZE_MBYTES=0
378CONFIG_CONNECTOR=y
379CONFIG_ZRAM=m
99CONFIG_BLK_DEV_LOOP=m 380CONFIG_BLK_DEV_LOOP=m
381CONFIG_BLK_DEV_CRYPTOLOOP=m
382CONFIG_BLK_DEV_DRBD=m
100CONFIG_BLK_DEV_NBD=m 383CONFIG_BLK_DEV_NBD=m
101CONFIG_BLK_DEV_RAM=y 384CONFIG_BLK_DEV_RAM=y
385CONFIG_BLK_DEV_RAM_SIZE=32768
102CONFIG_VIRTIO_BLK=y 386CONFIG_VIRTIO_BLK=y
387CONFIG_BLK_DEV_RBD=m
388CONFIG_BLK_DEV_NVME=m
389CONFIG_ENCLOSURE_SERVICES=m
390CONFIG_GENWQE=m
391CONFIG_RAID_ATTRS=m
103CONFIG_SCSI=y 392CONFIG_SCSI=y
104# CONFIG_SCSI_MQ_DEFAULT is not set
105CONFIG_BLK_DEV_SD=y 393CONFIG_BLK_DEV_SD=y
106CONFIG_CHR_DEV_ST=y 394CONFIG_CHR_DEV_ST=m
107CONFIG_BLK_DEV_SR=y 395CONFIG_CHR_DEV_OSST=m
108CONFIG_BLK_DEV_SR_VENDOR=y 396CONFIG_BLK_DEV_SR=m
109CONFIG_CHR_DEV_SG=y 397CONFIG_CHR_DEV_SG=y
398CONFIG_CHR_DEV_SCH=m
399CONFIG_SCSI_ENCLOSURE=m
110CONFIG_SCSI_CONSTANTS=y 400CONFIG_SCSI_CONSTANTS=y
111CONFIG_SCSI_LOGGING=y 401CONFIG_SCSI_LOGGING=y
402CONFIG_SCSI_SPI_ATTRS=m
112CONFIG_SCSI_FC_ATTRS=y 403CONFIG_SCSI_FC_ATTRS=y
404CONFIG_SCSI_SAS_LIBSAS=m
405CONFIG_SCSI_SRP_ATTRS=m
406CONFIG_ISCSI_TCP=m
407CONFIG_SCSI_DEBUG=m
113CONFIG_ZFCP=y 408CONFIG_ZFCP=y
114CONFIG_SCSI_VIRTIO=y 409CONFIG_SCSI_VIRTIO=m
410CONFIG_SCSI_DH=y
411CONFIG_SCSI_DH_RDAC=m
412CONFIG_SCSI_DH_HP_SW=m
413CONFIG_SCSI_DH_EMC=m
414CONFIG_SCSI_DH_ALUA=m
415CONFIG_SCSI_OSD_INITIATOR=m
416CONFIG_SCSI_OSD_ULD=m
115CONFIG_MD=y 417CONFIG_MD=y
418CONFIG_BLK_DEV_MD=y
116CONFIG_MD_LINEAR=m 419CONFIG_MD_LINEAR=m
117CONFIG_MD_MULTIPATH=m 420CONFIG_MD_MULTIPATH=m
118CONFIG_BLK_DEV_DM=y 421CONFIG_MD_FAULTY=m
422CONFIG_BLK_DEV_DM=m
119CONFIG_DM_CRYPT=m 423CONFIG_DM_CRYPT=m
120CONFIG_DM_SNAPSHOT=m 424CONFIG_DM_SNAPSHOT=m
425CONFIG_DM_THIN_PROVISIONING=m
121CONFIG_DM_MIRROR=m 426CONFIG_DM_MIRROR=m
122CONFIG_DM_LOG_USERSPACE=m 427CONFIG_DM_LOG_USERSPACE=m
123CONFIG_DM_RAID=m 428CONFIG_DM_RAID=m
@@ -125,71 +430,216 @@ CONFIG_DM_ZERO=m
125CONFIG_DM_MULTIPATH=m 430CONFIG_DM_MULTIPATH=m
126CONFIG_DM_MULTIPATH_QL=m 431CONFIG_DM_MULTIPATH_QL=m
127CONFIG_DM_MULTIPATH_ST=m 432CONFIG_DM_MULTIPATH_ST=m
433CONFIG_DM_DELAY=m
128CONFIG_DM_UEVENT=y 434CONFIG_DM_UEVENT=y
435CONFIG_DM_FLAKEY=m
129CONFIG_DM_VERITY=m 436CONFIG_DM_VERITY=m
130CONFIG_DM_SWITCH=m 437CONFIG_DM_SWITCH=m
131CONFIG_NETDEVICES=y 438CONFIG_NETDEVICES=y
132CONFIG_BONDING=m 439CONFIG_BONDING=m
133CONFIG_DUMMY=m 440CONFIG_DUMMY=m
134CONFIG_EQUALIZER=m 441CONFIG_EQUALIZER=m
442CONFIG_IFB=m
443CONFIG_MACVLAN=m
444CONFIG_MACVTAP=m
445CONFIG_VXLAN=m
135CONFIG_TUN=m 446CONFIG_TUN=m
136CONFIG_VIRTIO_NET=y 447CONFIG_VETH=m
137# CONFIG_NET_VENDOR_ALACRITECH is not set 448CONFIG_VIRTIO_NET=m
138# CONFIG_NET_VENDOR_AURORA is not set 449CONFIG_NLMON=m
139# CONFIG_NET_VENDOR_CORTINA is not set 450# CONFIG_NET_VENDOR_ARC is not set
140# CONFIG_NET_VENDOR_SOLARFLARE is not set 451# CONFIG_NET_VENDOR_CHELSIO is not set
141# CONFIG_NET_VENDOR_SOCIONEXT is not set 452# CONFIG_NET_VENDOR_INTEL is not set
142# CONFIG_NET_VENDOR_SYNOPSYS is not set 453# CONFIG_NET_VENDOR_MARVELL is not set
143# CONFIG_INPUT is not set 454CONFIG_MLX4_EN=m
455CONFIG_MLX5_CORE=m
456CONFIG_MLX5_CORE_EN=y
457# CONFIG_NET_VENDOR_NATSEMI is not set
458CONFIG_PPP=m
459CONFIG_PPP_BSDCOMP=m
460CONFIG_PPP_DEFLATE=m
461CONFIG_PPP_MPPE=m
462CONFIG_PPPOE=m
463CONFIG_PPTP=m
464CONFIG_PPPOL2TP=m
465CONFIG_PPP_ASYNC=m
466CONFIG_PPP_SYNC_TTY=m
467CONFIG_ISM=m
468CONFIG_INPUT_EVDEV=y
469# CONFIG_INPUT_KEYBOARD is not set
470# CONFIG_INPUT_MOUSE is not set
144# CONFIG_SERIO is not set 471# CONFIG_SERIO is not set
145# CONFIG_VT is not set 472CONFIG_LEGACY_PTY_COUNT=0
146CONFIG_DEVKMEM=y 473CONFIG_HW_RANDOM_VIRTIO=m
147CONFIG_RAW_DRIVER=m 474CONFIG_RAW_DRIVER=m
148CONFIG_VIRTIO_BALLOON=y 475CONFIG_HANGCHECK_TIMER=m
476CONFIG_TN3270_FS=y
477# CONFIG_HWMON is not set
478CONFIG_WATCHDOG=y
479CONFIG_WATCHDOG_NOWAYOUT=y
480CONFIG_SOFT_WATCHDOG=m
481CONFIG_DIAG288_WATCHDOG=m
482CONFIG_DRM=y
483CONFIG_DRM_VIRTIO_GPU=y
484CONFIG_FRAMEBUFFER_CONSOLE=y
485# CONFIG_HID is not set
486# CONFIG_USB_SUPPORT is not set
487CONFIG_INFINIBAND=m
488CONFIG_INFINIBAND_USER_ACCESS=m
489CONFIG_MLX4_INFINIBAND=m
490CONFIG_MLX5_INFINIBAND=m
491CONFIG_VFIO=m
492CONFIG_VFIO_PCI=m
493CONFIG_VFIO_MDEV=m
494CONFIG_VFIO_MDEV_DEVICE=m
495CONFIG_VIRTIO_PCI=m
496CONFIG_VIRTIO_BALLOON=m
497CONFIG_VIRTIO_INPUT=y
498CONFIG_S390_AP_IOMMU=y
499CONFIG_S390_CCW_IOMMU=y
149CONFIG_EXT4_FS=y 500CONFIG_EXT4_FS=y
150CONFIG_EXT4_FS_POSIX_ACL=y 501CONFIG_EXT4_FS_POSIX_ACL=y
151CONFIG_EXT4_FS_SECURITY=y 502CONFIG_EXT4_FS_SECURITY=y
503CONFIG_JBD2_DEBUG=y
504CONFIG_JFS_FS=m
505CONFIG_JFS_POSIX_ACL=y
506CONFIG_JFS_SECURITY=y
507CONFIG_JFS_STATISTICS=y
152CONFIG_XFS_FS=y 508CONFIG_XFS_FS=y
153CONFIG_XFS_QUOTA=y 509CONFIG_XFS_QUOTA=y
154CONFIG_XFS_POSIX_ACL=y 510CONFIG_XFS_POSIX_ACL=y
155CONFIG_XFS_RT=y 511CONFIG_XFS_RT=y
512CONFIG_GFS2_FS=m
513CONFIG_GFS2_FS_LOCKING_DLM=y
514CONFIG_OCFS2_FS=m
156CONFIG_BTRFS_FS=y 515CONFIG_BTRFS_FS=y
157CONFIG_BTRFS_FS_POSIX_ACL=y 516CONFIG_BTRFS_FS_POSIX_ACL=y
517CONFIG_NILFS2_FS=m
518CONFIG_FS_DAX=y
519CONFIG_EXPORTFS_BLOCK_OPS=y
520CONFIG_FS_ENCRYPTION=y
158CONFIG_FANOTIFY=y 521CONFIG_FANOTIFY=y
522CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
523CONFIG_QUOTA_NETLINK_INTERFACE=y
524CONFIG_QFMT_V1=m
525CONFIG_QFMT_V2=m
526CONFIG_AUTOFS4_FS=m
159CONFIG_FUSE_FS=y 527CONFIG_FUSE_FS=y
528CONFIG_CUSE=m
529CONFIG_OVERLAY_FS=m
530CONFIG_FSCACHE=m
531CONFIG_CACHEFILES=m
532CONFIG_ISO9660_FS=y
533CONFIG_JOLIET=y
534CONFIG_ZISOFS=y
535CONFIG_UDF_FS=m
536CONFIG_MSDOS_FS=m
537CONFIG_VFAT_FS=m
538CONFIG_NTFS_FS=m
539CONFIG_NTFS_RW=y
160CONFIG_PROC_KCORE=y 540CONFIG_PROC_KCORE=y
161CONFIG_TMPFS=y 541CONFIG_TMPFS=y
162CONFIG_TMPFS_POSIX_ACL=y 542CONFIG_TMPFS_POSIX_ACL=y
163CONFIG_HUGETLBFS=y 543CONFIG_HUGETLBFS=y
164# CONFIG_NETWORK_FILESYSTEMS is not set 544CONFIG_CONFIGFS_FS=m
545CONFIG_ECRYPT_FS=m
546CONFIG_CRAMFS=m
547CONFIG_SQUASHFS=m
548CONFIG_SQUASHFS_XATTR=y
549CONFIG_SQUASHFS_LZO=y
550CONFIG_SQUASHFS_XZ=y
551CONFIG_ROMFS_FS=m
552CONFIG_NFS_FS=m
553CONFIG_NFS_V3_ACL=y
554CONFIG_NFS_V4=m
555CONFIG_NFS_SWAP=y
556CONFIG_NFSD=m
557CONFIG_NFSD_V3_ACL=y
558CONFIG_NFSD_V4=y
559CONFIG_NFSD_V4_SECURITY_LABEL=y
560CONFIG_CIFS=m
561CONFIG_CIFS_STATS=y
562CONFIG_CIFS_STATS2=y
563CONFIG_CIFS_WEAK_PW_HASH=y
564CONFIG_CIFS_UPCALL=y
565CONFIG_CIFS_XATTR=y
566CONFIG_CIFS_POSIX=y
567# CONFIG_CIFS_DEBUG is not set
568CONFIG_CIFS_DFS_UPCALL=y
569CONFIG_NLS_DEFAULT="utf8"
570CONFIG_NLS_CODEPAGE_437=m
571CONFIG_NLS_CODEPAGE_850=m
572CONFIG_NLS_ASCII=m
573CONFIG_NLS_ISO8859_1=m
574CONFIG_NLS_ISO8859_15=m
575CONFIG_NLS_UTF8=m
576CONFIG_DLM=m
577CONFIG_PRINTK_TIME=y
578CONFIG_DEBUG_INFO=y
579CONFIG_DEBUG_INFO_DWARF4=y
580CONFIG_GDB_SCRIPTS=y
581# CONFIG_ENABLE_MUST_CHECK is not set
582CONFIG_FRAME_WARN=1024
583CONFIG_UNUSED_SYMBOLS=y
584CONFIG_MAGIC_SYSRQ=y
585CONFIG_DEBUG_MEMORY_INIT=y
586CONFIG_PANIC_ON_OOPS=y
587CONFIG_RCU_TORTURE_TEST=m
588CONFIG_RCU_CPU_STALL_TIMEOUT=60
589CONFIG_LATENCYTOP=y
590CONFIG_SCHED_TRACER=y
591CONFIG_FTRACE_SYSCALLS=y
592CONFIG_STACK_TRACER=y
593CONFIG_BLK_DEV_IO_TRACE=y
594CONFIG_FUNCTION_PROFILER=y
595CONFIG_HIST_TRIGGERS=y
596CONFIG_LKDTM=m
597CONFIG_PERCPU_TEST=m
598CONFIG_ATOMIC64_SELFTEST=y
599CONFIG_TEST_BPF=m
600CONFIG_BUG_ON_DATA_CORRUPTION=y
601CONFIG_S390_PTDUMP=y
602CONFIG_PERSISTENT_KEYRINGS=y
603CONFIG_BIG_KEYS=y
604CONFIG_ENCRYPTED_KEYS=m
605CONFIG_SECURITY=y
606CONFIG_SECURITY_NETWORK=y
607CONFIG_SECURITY_SELINUX=y
608CONFIG_SECURITY_SELINUX_BOOTPARAM=y
609CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
610CONFIG_SECURITY_SELINUX_DISABLE=y
611CONFIG_INTEGRITY_SIGNATURE=y
612CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
613CONFIG_IMA=y
614CONFIG_IMA_DEFAULT_HASH_SHA256=y
615CONFIG_IMA_WRITE_POLICY=y
616CONFIG_IMA_APPRAISE=y
617CONFIG_CRYPTO_FIPS=y
618CONFIG_CRYPTO_DH=m
619CONFIG_CRYPTO_ECDH=m
620CONFIG_CRYPTO_USER=m
621# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
622CONFIG_CRYPTO_PCRYPT=m
165CONFIG_CRYPTO_CRYPTD=m 623CONFIG_CRYPTO_CRYPTD=m
166CONFIG_CRYPTO_AUTHENC=m
167CONFIG_CRYPTO_TEST=m 624CONFIG_CRYPTO_TEST=m
168CONFIG_CRYPTO_CCM=m 625CONFIG_CRYPTO_CHACHA20POLY1305=m
169CONFIG_CRYPTO_GCM=m
170CONFIG_CRYPTO_CBC=y
171CONFIG_CRYPTO_CFB=m
172CONFIG_CRYPTO_CTS=m
173CONFIG_CRYPTO_LRW=m 626CONFIG_CRYPTO_LRW=m
174CONFIG_CRYPTO_OFB=m
175CONFIG_CRYPTO_PCBC=m 627CONFIG_CRYPTO_PCBC=m
176CONFIG_CRYPTO_XTS=m 628CONFIG_CRYPTO_KEYWRAP=m
177CONFIG_CRYPTO_CMAC=m
178CONFIG_CRYPTO_XCBC=m 629CONFIG_CRYPTO_XCBC=m
179CONFIG_CRYPTO_VMAC=m 630CONFIG_CRYPTO_VMAC=m
180CONFIG_CRYPTO_CRC32=m 631CONFIG_CRYPTO_CRC32=m
181CONFIG_CRYPTO_MD4=m
182CONFIG_CRYPTO_MICHAEL_MIC=m 632CONFIG_CRYPTO_MICHAEL_MIC=m
183CONFIG_CRYPTO_RMD128=m 633CONFIG_CRYPTO_RMD128=m
184CONFIG_CRYPTO_RMD160=m 634CONFIG_CRYPTO_RMD160=m
185CONFIG_CRYPTO_RMD256=m 635CONFIG_CRYPTO_RMD256=m
186CONFIG_CRYPTO_RMD320=m 636CONFIG_CRYPTO_RMD320=m
187CONFIG_CRYPTO_SHA256=y
188CONFIG_CRYPTO_SHA512=m 637CONFIG_CRYPTO_SHA512=m
638CONFIG_CRYPTO_SHA3=m
189CONFIG_CRYPTO_TGR192=m 639CONFIG_CRYPTO_TGR192=m
190CONFIG_CRYPTO_WP512=m 640CONFIG_CRYPTO_WP512=m
641CONFIG_CRYPTO_AES_TI=m
191CONFIG_CRYPTO_ANUBIS=m 642CONFIG_CRYPTO_ANUBIS=m
192CONFIG_CRYPTO_ARC4=m
193CONFIG_CRYPTO_BLOWFISH=m 643CONFIG_CRYPTO_BLOWFISH=m
194CONFIG_CRYPTO_CAMELLIA=m 644CONFIG_CRYPTO_CAMELLIA=m
195CONFIG_CRYPTO_CAST5=m 645CONFIG_CRYPTO_CAST5=m
@@ -199,16 +649,16 @@ CONFIG_CRYPTO_KHAZAD=m
199CONFIG_CRYPTO_SALSA20=m 649CONFIG_CRYPTO_SALSA20=m
200CONFIG_CRYPTO_SEED=m 650CONFIG_CRYPTO_SEED=m
201CONFIG_CRYPTO_SERPENT=m 651CONFIG_CRYPTO_SERPENT=m
202CONFIG_CRYPTO_SM4=m
203CONFIG_CRYPTO_TEA=m 652CONFIG_CRYPTO_TEA=m
204CONFIG_CRYPTO_TWOFISH=m 653CONFIG_CRYPTO_TWOFISH=m
205CONFIG_CRYPTO_DEFLATE=m 654CONFIG_CRYPTO_842=m
206CONFIG_CRYPTO_LZ4=m 655CONFIG_CRYPTO_LZ4=m
207CONFIG_CRYPTO_LZ4HC=m 656CONFIG_CRYPTO_LZ4HC=m
208CONFIG_CRYPTO_ANSI_CPRNG=m 657CONFIG_CRYPTO_ANSI_CPRNG=m
209CONFIG_CRYPTO_USER_API_HASH=m 658CONFIG_CRYPTO_USER_API_HASH=m
210CONFIG_CRYPTO_USER_API_SKCIPHER=m 659CONFIG_CRYPTO_USER_API_SKCIPHER=m
211CONFIG_CRYPTO_USER_API_RNG=m 660CONFIG_CRYPTO_USER_API_RNG=m
661CONFIG_CRYPTO_USER_API_AEAD=m
212CONFIG_ZCRYPT=m 662CONFIG_ZCRYPT=m
213CONFIG_PKEY=m 663CONFIG_PKEY=m
214CONFIG_CRYPTO_PAES_S390=m 664CONFIG_CRYPTO_PAES_S390=m
@@ -217,38 +667,14 @@ CONFIG_CRYPTO_SHA256_S390=m
217CONFIG_CRYPTO_SHA512_S390=m 667CONFIG_CRYPTO_SHA512_S390=m
218CONFIG_CRYPTO_DES_S390=m 668CONFIG_CRYPTO_DES_S390=m
219CONFIG_CRYPTO_AES_S390=m 669CONFIG_CRYPTO_AES_S390=m
670CONFIG_CRYPTO_GHASH_S390=m
220CONFIG_CRYPTO_CRC32_S390=y 671CONFIG_CRYPTO_CRC32_S390=y
221CONFIG_CRC7=m 672CONFIG_CRC7=m
222# CONFIG_XZ_DEC_X86 is not set 673CONFIG_CRC8=m
223# CONFIG_XZ_DEC_POWERPC is not set 674CONFIG_CORDIC=m
224# CONFIG_XZ_DEC_IA64 is not set 675CONFIG_CMM=m
225# CONFIG_XZ_DEC_ARM is not set 676CONFIG_APPLDATA_BASE=y
226# CONFIG_XZ_DEC_ARMTHUMB is not set 677CONFIG_KVM=m
227# CONFIG_XZ_DEC_SPARC is not set 678CONFIG_KVM_S390_UCONTROL=y
228CONFIG_DEBUG_INFO=y 679CONFIG_VHOST_NET=m
229CONFIG_DEBUG_INFO_DWARF4=y 680CONFIG_VHOST_VSOCK=m
230CONFIG_GDB_SCRIPTS=y
231CONFIG_UNUSED_SYMBOLS=y
232CONFIG_DEBUG_SECTION_MISMATCH=y
233CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
234CONFIG_MAGIC_SYSRQ=y
235CONFIG_DEBUG_PAGEALLOC=y
236CONFIG_DETECT_HUNG_TASK=y
237CONFIG_PANIC_ON_OOPS=y
238CONFIG_PROVE_LOCKING=y
239CONFIG_LOCK_STAT=y
240CONFIG_DEBUG_LOCKDEP=y
241CONFIG_DEBUG_ATOMIC_SLEEP=y
242CONFIG_DEBUG_LIST=y
243CONFIG_DEBUG_SG=y
244CONFIG_DEBUG_NOTIFIERS=y
245CONFIG_RCU_CPU_STALL_TIMEOUT=60
246CONFIG_LATENCYTOP=y
247CONFIG_SCHED_TRACER=y
248CONFIG_FTRACE_SYSCALLS=y
249CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
250CONFIG_STACK_TRACER=y
251CONFIG_BLK_DEV_IO_TRACE=y
252CONFIG_FUNCTION_PROFILER=y
253# CONFIG_RUNTIME_TESTING_MENU is not set
254CONFIG_S390_PTDUMP=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
deleted file mode 100644
index 09aa5cb14873..000000000000
--- a/arch/s390/configs/performance_defconfig
+++ /dev/null
@@ -1,678 +0,0 @@
1CONFIG_SYSVIPC=y
2CONFIG_POSIX_MQUEUE=y
3CONFIG_AUDIT=y
4CONFIG_NO_HZ_IDLE=y
5CONFIG_HIGH_RES_TIMERS=y
6CONFIG_BSD_PROCESS_ACCT=y
7CONFIG_BSD_PROCESS_ACCT_V3=y
8CONFIG_TASKSTATS=y
9CONFIG_TASK_DELAY_ACCT=y
10CONFIG_TASK_XACCT=y
11CONFIG_TASK_IO_ACCOUNTING=y
12CONFIG_IKCONFIG=y
13CONFIG_IKCONFIG_PROC=y
14CONFIG_NUMA_BALANCING=y
15# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
16CONFIG_MEMCG=y
17CONFIG_MEMCG_SWAP=y
18CONFIG_BLK_CGROUP=y
19CONFIG_CFS_BANDWIDTH=y
20CONFIG_RT_GROUP_SCHED=y
21CONFIG_CGROUP_PIDS=y
22CONFIG_CGROUP_FREEZER=y
23CONFIG_CGROUP_HUGETLB=y
24CONFIG_CPUSETS=y
25CONFIG_CGROUP_DEVICE=y
26CONFIG_CGROUP_CPUACCT=y
27CONFIG_CGROUP_PERF=y
28CONFIG_NAMESPACES=y
29CONFIG_USER_NS=y
30CONFIG_SCHED_AUTOGROUP=y
31CONFIG_BLK_DEV_INITRD=y
32CONFIG_EXPERT=y
33# CONFIG_SYSFS_SYSCALL is not set
34CONFIG_CHECKPOINT_RESTORE=y
35CONFIG_BPF_SYSCALL=y
36CONFIG_USERFAULTFD=y
37# CONFIG_COMPAT_BRK is not set
38CONFIG_PROFILING=y
39CONFIG_OPROFILE=m
40CONFIG_KPROBES=y
41CONFIG_JUMP_LABEL=y
42CONFIG_MODULES=y
43CONFIG_MODULE_FORCE_LOAD=y
44CONFIG_MODULE_UNLOAD=y
45CONFIG_MODULE_FORCE_UNLOAD=y
46CONFIG_MODVERSIONS=y
47CONFIG_MODULE_SRCVERSION_ALL=y
48CONFIG_MODULE_SIG=y
49CONFIG_MODULE_SIG_SHA256=y
50CONFIG_BLK_DEV_INTEGRITY=y
51CONFIG_BLK_DEV_THROTTLING=y
52CONFIG_BLK_WBT=y
53CONFIG_BLK_WBT_SQ=y
54CONFIG_PARTITION_ADVANCED=y
55CONFIG_IBM_PARTITION=y
56CONFIG_BSD_DISKLABEL=y
57CONFIG_MINIX_SUBPARTITION=y
58CONFIG_SOLARIS_X86_PARTITION=y
59CONFIG_UNIXWARE_DISKLABEL=y
60CONFIG_CFQ_GROUP_IOSCHED=y
61CONFIG_DEFAULT_DEADLINE=y
62CONFIG_LIVEPATCH=y
63CONFIG_TUNE_ZEC12=y
64CONFIG_NR_CPUS=512
65CONFIG_NUMA=y
66CONFIG_HZ_100=y
67CONFIG_KEXEC_FILE=y
68CONFIG_KEXEC_VERIFY_SIG=y
69CONFIG_EXPOLINE=y
70CONFIG_EXPOLINE_AUTO=y
71CONFIG_MEMORY_HOTPLUG=y
72CONFIG_MEMORY_HOTREMOVE=y
73CONFIG_KSM=y
74CONFIG_TRANSPARENT_HUGEPAGE=y
75CONFIG_CLEANCACHE=y
76CONFIG_FRONTSWAP=y
77CONFIG_MEM_SOFT_DIRTY=y
78CONFIG_ZSWAP=y
79CONFIG_ZBUD=m
80CONFIG_ZSMALLOC=m
81CONFIG_ZSMALLOC_STAT=y
82CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
83CONFIG_IDLE_PAGE_TRACKING=y
84CONFIG_PCI=y
85CONFIG_HOTPLUG_PCI=y
86CONFIG_HOTPLUG_PCI_S390=y
87CONFIG_CHSC_SCH=y
88CONFIG_VFIO_AP=m
89CONFIG_CRASH_DUMP=y
90CONFIG_BINFMT_MISC=m
91CONFIG_HIBERNATION=y
92CONFIG_PM_DEBUG=y
93CONFIG_NET=y
94CONFIG_PACKET=y
95CONFIG_PACKET_DIAG=m
96CONFIG_UNIX=y
97CONFIG_UNIX_DIAG=m
98CONFIG_XFRM_USER=m
99CONFIG_NET_KEY=m
100CONFIG_SMC=m
101CONFIG_SMC_DIAG=m
102CONFIG_INET=y
103CONFIG_IP_MULTICAST=y
104CONFIG_IP_ADVANCED_ROUTER=y
105CONFIG_IP_MULTIPLE_TABLES=y
106CONFIG_IP_ROUTE_MULTIPATH=y
107CONFIG_IP_ROUTE_VERBOSE=y
108CONFIG_NET_IPIP=m
109CONFIG_NET_IPGRE_DEMUX=m
110CONFIG_NET_IPGRE=m
111CONFIG_NET_IPGRE_BROADCAST=y
112CONFIG_IP_MROUTE=y
113CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
114CONFIG_IP_PIMSM_V1=y
115CONFIG_IP_PIMSM_V2=y
116CONFIG_SYN_COOKIES=y
117CONFIG_NET_IPVTI=m
118CONFIG_INET_AH=m
119CONFIG_INET_ESP=m
120CONFIG_INET_IPCOMP=m
121CONFIG_INET_XFRM_MODE_TRANSPORT=m
122CONFIG_INET_XFRM_MODE_TUNNEL=m
123CONFIG_INET_XFRM_MODE_BEET=m
124CONFIG_INET_DIAG=m
125CONFIG_INET_UDP_DIAG=m
126CONFIG_TCP_CONG_ADVANCED=y
127CONFIG_TCP_CONG_HSTCP=m
128CONFIG_TCP_CONG_HYBLA=m
129CONFIG_TCP_CONG_SCALABLE=m
130CONFIG_TCP_CONG_LP=m
131CONFIG_TCP_CONG_VENO=m
132CONFIG_TCP_CONG_YEAH=m
133CONFIG_TCP_CONG_ILLINOIS=m
134CONFIG_IPV6_ROUTER_PREF=y
135CONFIG_INET6_AH=m
136CONFIG_INET6_ESP=m
137CONFIG_INET6_IPCOMP=m
138CONFIG_IPV6_MIP6=m
139CONFIG_INET6_XFRM_MODE_TRANSPORT=m
140CONFIG_INET6_XFRM_MODE_TUNNEL=m
141CONFIG_INET6_XFRM_MODE_BEET=m
142CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
143CONFIG_IPV6_VTI=m
144CONFIG_IPV6_SIT=m
145CONFIG_IPV6_GRE=m
146CONFIG_IPV6_MULTIPLE_TABLES=y
147CONFIG_IPV6_SUBTREES=y
148CONFIG_NETFILTER=y
149CONFIG_NF_CONNTRACK=m
150CONFIG_NF_CONNTRACK_SECMARK=y
151CONFIG_NF_CONNTRACK_EVENTS=y
152CONFIG_NF_CONNTRACK_TIMEOUT=y
153CONFIG_NF_CONNTRACK_TIMESTAMP=y
154CONFIG_NF_CONNTRACK_AMANDA=m
155CONFIG_NF_CONNTRACK_FTP=m
156CONFIG_NF_CONNTRACK_H323=m
157CONFIG_NF_CONNTRACK_IRC=m
158CONFIG_NF_CONNTRACK_NETBIOS_NS=m
159CONFIG_NF_CONNTRACK_SNMP=m
160CONFIG_NF_CONNTRACK_PPTP=m
161CONFIG_NF_CONNTRACK_SANE=m
162CONFIG_NF_CONNTRACK_SIP=m
163CONFIG_NF_CONNTRACK_TFTP=m
164CONFIG_NF_CT_NETLINK=m
165CONFIG_NF_CT_NETLINK_TIMEOUT=m
166CONFIG_NF_TABLES=m
167CONFIG_NFT_CT=m
168CONFIG_NFT_COUNTER=m
169CONFIG_NFT_LOG=m
170CONFIG_NFT_LIMIT=m
171CONFIG_NFT_NAT=m
172CONFIG_NFT_COMPAT=m
173CONFIG_NFT_HASH=m
174CONFIG_NETFILTER_XT_SET=m
175CONFIG_NETFILTER_XT_TARGET_AUDIT=m
176CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
177CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
178CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
179CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
180CONFIG_NETFILTER_XT_TARGET_CT=m
181CONFIG_NETFILTER_XT_TARGET_DSCP=m
182CONFIG_NETFILTER_XT_TARGET_HMARK=m
183CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
184CONFIG_NETFILTER_XT_TARGET_LOG=m
185CONFIG_NETFILTER_XT_TARGET_MARK=m
186CONFIG_NETFILTER_XT_TARGET_NFLOG=m
187CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
188CONFIG_NETFILTER_XT_TARGET_TEE=m
189CONFIG_NETFILTER_XT_TARGET_TPROXY=m
190CONFIG_NETFILTER_XT_TARGET_TRACE=m
191CONFIG_NETFILTER_XT_TARGET_SECMARK=m
192CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
193CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
194CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
195CONFIG_NETFILTER_XT_MATCH_BPF=m
196CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
197CONFIG_NETFILTER_XT_MATCH_COMMENT=m
198CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
199CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
200CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
201CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
202CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
203CONFIG_NETFILTER_XT_MATCH_CPU=m
204CONFIG_NETFILTER_XT_MATCH_DCCP=m
205CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
206CONFIG_NETFILTER_XT_MATCH_DSCP=m
207CONFIG_NETFILTER_XT_MATCH_ESP=m
208CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
209CONFIG_NETFILTER_XT_MATCH_HELPER=m
210CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
211CONFIG_NETFILTER_XT_MATCH_IPVS=m
212CONFIG_NETFILTER_XT_MATCH_LENGTH=m
213CONFIG_NETFILTER_XT_MATCH_LIMIT=m
214CONFIG_NETFILTER_XT_MATCH_MAC=m
215CONFIG_NETFILTER_XT_MATCH_MARK=m
216CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
217CONFIG_NETFILTER_XT_MATCH_NFACCT=m
218CONFIG_NETFILTER_XT_MATCH_OSF=m
219CONFIG_NETFILTER_XT_MATCH_OWNER=m
220CONFIG_NETFILTER_XT_MATCH_POLICY=m
221CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
222CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
223CONFIG_NETFILTER_XT_MATCH_QUOTA=m
224CONFIG_NETFILTER_XT_MATCH_RATEEST=m
225CONFIG_NETFILTER_XT_MATCH_REALM=m
226CONFIG_NETFILTER_XT_MATCH_RECENT=m
227CONFIG_NETFILTER_XT_MATCH_STATE=m
228CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
229CONFIG_NETFILTER_XT_MATCH_STRING=m
230CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
231CONFIG_NETFILTER_XT_MATCH_TIME=m
232CONFIG_NETFILTER_XT_MATCH_U32=m
233CONFIG_IP_SET=m
234CONFIG_IP_SET_BITMAP_IP=m
235CONFIG_IP_SET_BITMAP_IPMAC=m
236CONFIG_IP_SET_BITMAP_PORT=m
237CONFIG_IP_SET_HASH_IP=m
238CONFIG_IP_SET_HASH_IPPORT=m
239CONFIG_IP_SET_HASH_IPPORTIP=m
240CONFIG_IP_SET_HASH_IPPORTNET=m
241CONFIG_IP_SET_HASH_NETPORTNET=m
242CONFIG_IP_SET_HASH_NET=m
243CONFIG_IP_SET_HASH_NETNET=m
244CONFIG_IP_SET_HASH_NETPORT=m
245CONFIG_IP_SET_HASH_NETIFACE=m
246CONFIG_IP_SET_LIST_SET=m
247CONFIG_IP_VS=m
248CONFIG_IP_VS_PROTO_TCP=y
249CONFIG_IP_VS_PROTO_UDP=y
250CONFIG_IP_VS_PROTO_ESP=y
251CONFIG_IP_VS_PROTO_AH=y
252CONFIG_IP_VS_RR=m
253CONFIG_IP_VS_WRR=m
254CONFIG_IP_VS_LC=m
255CONFIG_IP_VS_WLC=m
256CONFIG_IP_VS_LBLC=m
257CONFIG_IP_VS_LBLCR=m
258CONFIG_IP_VS_DH=m
259CONFIG_IP_VS_SH=m
260CONFIG_IP_VS_SED=m
261CONFIG_IP_VS_NQ=m
262CONFIG_IP_VS_FTP=m
263CONFIG_IP_VS_PE_SIP=m
264CONFIG_NF_CONNTRACK_IPV4=m
265CONFIG_NF_TABLES_IPV4=y
266CONFIG_NFT_CHAIN_ROUTE_IPV4=m
267CONFIG_NF_TABLES_ARP=y
268CONFIG_NFT_CHAIN_NAT_IPV4=m
269CONFIG_IP_NF_IPTABLES=m
270CONFIG_IP_NF_MATCH_AH=m
271CONFIG_IP_NF_MATCH_ECN=m
272CONFIG_IP_NF_MATCH_RPFILTER=m
273CONFIG_IP_NF_MATCH_TTL=m
274CONFIG_IP_NF_FILTER=m
275CONFIG_IP_NF_TARGET_REJECT=m
276CONFIG_IP_NF_NAT=m
277CONFIG_IP_NF_TARGET_MASQUERADE=m
278CONFIG_IP_NF_MANGLE=m
279CONFIG_IP_NF_TARGET_CLUSTERIP=m
280CONFIG_IP_NF_TARGET_ECN=m
281CONFIG_IP_NF_TARGET_TTL=m
282CONFIG_IP_NF_RAW=m
283CONFIG_IP_NF_SECURITY=m
284CONFIG_IP_NF_ARPTABLES=m
285CONFIG_IP_NF_ARPFILTER=m
286CONFIG_IP_NF_ARP_MANGLE=m
287CONFIG_NF_CONNTRACK_IPV6=m
288CONFIG_NF_TABLES_IPV6=y
289CONFIG_NFT_CHAIN_ROUTE_IPV6=m
290CONFIG_NFT_CHAIN_NAT_IPV6=m
291CONFIG_IP6_NF_IPTABLES=m
292CONFIG_IP6_NF_MATCH_AH=m
293CONFIG_IP6_NF_MATCH_EUI64=m
294CONFIG_IP6_NF_MATCH_FRAG=m
295CONFIG_IP6_NF_MATCH_OPTS=m
296CONFIG_IP6_NF_MATCH_HL=m
297CONFIG_IP6_NF_MATCH_IPV6HEADER=m
298CONFIG_IP6_NF_MATCH_MH=m
299CONFIG_IP6_NF_MATCH_RPFILTER=m
300CONFIG_IP6_NF_MATCH_RT=m
301CONFIG_IP6_NF_TARGET_HL=m
302CONFIG_IP6_NF_FILTER=m
303CONFIG_IP6_NF_TARGET_REJECT=m
304CONFIG_IP6_NF_MANGLE=m
305CONFIG_IP6_NF_RAW=m
306CONFIG_IP6_NF_SECURITY=m
307CONFIG_IP6_NF_NAT=m
308CONFIG_IP6_NF_TARGET_MASQUERADE=m
309CONFIG_NF_TABLES_BRIDGE=y
310CONFIG_RDS=m
311CONFIG_RDS_RDMA=m
312CONFIG_RDS_TCP=m
313CONFIG_L2TP=m
314CONFIG_L2TP_DEBUGFS=m
315CONFIG_L2TP_V3=y
316CONFIG_L2TP_IP=m
317CONFIG_L2TP_ETH=m
318CONFIG_BRIDGE=m
319CONFIG_VLAN_8021Q=m
320CONFIG_VLAN_8021Q_GVRP=y
321CONFIG_NET_SCHED=y
322CONFIG_NET_SCH_CBQ=m
323CONFIG_NET_SCH_HTB=m
324CONFIG_NET_SCH_HFSC=m
325CONFIG_NET_SCH_PRIO=m
326CONFIG_NET_SCH_MULTIQ=m
327CONFIG_NET_SCH_RED=m
328CONFIG_NET_SCH_SFB=m
329CONFIG_NET_SCH_SFQ=m
330CONFIG_NET_SCH_TEQL=m
331CONFIG_NET_SCH_TBF=m
332CONFIG_NET_SCH_GRED=m
333CONFIG_NET_SCH_DSMARK=m
334CONFIG_NET_SCH_NETEM=m
335CONFIG_NET_SCH_DRR=m
336CONFIG_NET_SCH_MQPRIO=m
337CONFIG_NET_SCH_CHOKE=m
338CONFIG_NET_SCH_QFQ=m
339CONFIG_NET_SCH_CODEL=m
340CONFIG_NET_SCH_FQ_CODEL=m
341CONFIG_NET_SCH_INGRESS=m
342CONFIG_NET_SCH_PLUG=m
343CONFIG_NET_CLS_BASIC=m
344CONFIG_NET_CLS_TCINDEX=m
345CONFIG_NET_CLS_ROUTE4=m
346CONFIG_NET_CLS_FW=m
347CONFIG_NET_CLS_U32=m
348CONFIG_CLS_U32_PERF=y
349CONFIG_CLS_U32_MARK=y
350CONFIG_NET_CLS_RSVP=m
351CONFIG_NET_CLS_RSVP6=m
352CONFIG_NET_CLS_FLOW=m
353CONFIG_NET_CLS_CGROUP=y
354CONFIG_NET_CLS_BPF=m
355CONFIG_NET_CLS_ACT=y
356CONFIG_NET_ACT_POLICE=m
357CONFIG_NET_ACT_GACT=m
358CONFIG_GACT_PROB=y
359CONFIG_NET_ACT_MIRRED=m
360CONFIG_NET_ACT_IPT=m
361CONFIG_NET_ACT_NAT=m
362CONFIG_NET_ACT_PEDIT=m
363CONFIG_NET_ACT_SIMP=m
364CONFIG_NET_ACT_SKBEDIT=m
365CONFIG_NET_ACT_CSUM=m
366CONFIG_DNS_RESOLVER=y
367CONFIG_OPENVSWITCH=m
368CONFIG_VSOCKETS=m
369CONFIG_VIRTIO_VSOCKETS=m
370CONFIG_NETLINK_DIAG=m
371CONFIG_CGROUP_NET_PRIO=y
372CONFIG_BPF_JIT=y
373CONFIG_NET_PKTGEN=m
374CONFIG_DEVTMPFS=y
375CONFIG_DMA_CMA=y
376CONFIG_CMA_SIZE_MBYTES=0
377CONFIG_CONNECTOR=y
378CONFIG_ZRAM=m
379CONFIG_BLK_DEV_LOOP=m
380CONFIG_BLK_DEV_CRYPTOLOOP=m
381CONFIG_BLK_DEV_DRBD=m
382CONFIG_BLK_DEV_NBD=m
383CONFIG_BLK_DEV_RAM=y
384CONFIG_BLK_DEV_RAM_SIZE=32768
385CONFIG_VIRTIO_BLK=y
386CONFIG_BLK_DEV_RBD=m
387CONFIG_BLK_DEV_NVME=m
388CONFIG_ENCLOSURE_SERVICES=m
389CONFIG_GENWQE=m
390CONFIG_RAID_ATTRS=m
391CONFIG_SCSI=y
392CONFIG_BLK_DEV_SD=y
393CONFIG_CHR_DEV_ST=m
394CONFIG_CHR_DEV_OSST=m
395CONFIG_BLK_DEV_SR=m
396CONFIG_CHR_DEV_SG=y
397CONFIG_CHR_DEV_SCH=m
398CONFIG_SCSI_ENCLOSURE=m
399CONFIG_SCSI_CONSTANTS=y
400CONFIG_SCSI_LOGGING=y
401CONFIG_SCSI_SPI_ATTRS=m
402CONFIG_SCSI_FC_ATTRS=y
403CONFIG_SCSI_SAS_LIBSAS=m
404CONFIG_SCSI_SRP_ATTRS=m
405CONFIG_ISCSI_TCP=m
406CONFIG_SCSI_DEBUG=m
407CONFIG_ZFCP=y
408CONFIG_SCSI_VIRTIO=m
409CONFIG_SCSI_DH=y
410CONFIG_SCSI_DH_RDAC=m
411CONFIG_SCSI_DH_HP_SW=m
412CONFIG_SCSI_DH_EMC=m
413CONFIG_SCSI_DH_ALUA=m
414CONFIG_SCSI_OSD_INITIATOR=m
415CONFIG_SCSI_OSD_ULD=m
416CONFIG_MD=y
417CONFIG_BLK_DEV_MD=y
418CONFIG_MD_LINEAR=m
419CONFIG_MD_MULTIPATH=m
420CONFIG_MD_FAULTY=m
421CONFIG_BLK_DEV_DM=m
422CONFIG_DM_CRYPT=m
423CONFIG_DM_SNAPSHOT=m
424CONFIG_DM_THIN_PROVISIONING=m
425CONFIG_DM_MIRROR=m
426CONFIG_DM_LOG_USERSPACE=m
427CONFIG_DM_RAID=m
428CONFIG_DM_ZERO=m
429CONFIG_DM_MULTIPATH=m
430CONFIG_DM_MULTIPATH_QL=m
431CONFIG_DM_MULTIPATH_ST=m
432CONFIG_DM_DELAY=m
433CONFIG_DM_UEVENT=y
434CONFIG_DM_FLAKEY=m
435CONFIG_DM_VERITY=m
436CONFIG_DM_SWITCH=m
437CONFIG_NETDEVICES=y
438CONFIG_BONDING=m
439CONFIG_DUMMY=m
440CONFIG_EQUALIZER=m
441CONFIG_IFB=m
442CONFIG_MACVLAN=m
443CONFIG_MACVTAP=m
444CONFIG_VXLAN=m
445CONFIG_TUN=m
446CONFIG_VETH=m
447CONFIG_VIRTIO_NET=m
448CONFIG_NLMON=m
449# CONFIG_NET_VENDOR_ARC is not set
450# CONFIG_NET_VENDOR_CHELSIO is not set
451# CONFIG_NET_VENDOR_INTEL is not set
452# CONFIG_NET_VENDOR_MARVELL is not set
453CONFIG_MLX4_EN=m
454CONFIG_MLX5_CORE=m
455CONFIG_MLX5_CORE_EN=y
456# CONFIG_NET_VENDOR_NATSEMI is not set
457CONFIG_PPP=m
458CONFIG_PPP_BSDCOMP=m
459CONFIG_PPP_DEFLATE=m
460CONFIG_PPP_MPPE=m
461CONFIG_PPPOE=m
462CONFIG_PPTP=m
463CONFIG_PPPOL2TP=m
464CONFIG_PPP_ASYNC=m
465CONFIG_PPP_SYNC_TTY=m
466CONFIG_ISM=m
467CONFIG_INPUT_EVDEV=y
468# CONFIG_INPUT_KEYBOARD is not set
469# CONFIG_INPUT_MOUSE is not set
470# CONFIG_SERIO is not set
471CONFIG_LEGACY_PTY_COUNT=0
472CONFIG_HW_RANDOM_VIRTIO=m
473CONFIG_RAW_DRIVER=m
474CONFIG_HANGCHECK_TIMER=m
475CONFIG_TN3270_FS=y
476# CONFIG_HWMON is not set
477CONFIG_WATCHDOG=y
478CONFIG_WATCHDOG_NOWAYOUT=y
479CONFIG_SOFT_WATCHDOG=m
480CONFIG_DIAG288_WATCHDOG=m
481CONFIG_DRM=y
482CONFIG_DRM_VIRTIO_GPU=y
483CONFIG_FRAMEBUFFER_CONSOLE=y
484# CONFIG_HID is not set
485# CONFIG_USB_SUPPORT is not set
486CONFIG_INFINIBAND=m
487CONFIG_INFINIBAND_USER_ACCESS=m
488CONFIG_MLX4_INFINIBAND=m
489CONFIG_MLX5_INFINIBAND=m
490CONFIG_VFIO=m
491CONFIG_VFIO_PCI=m
492CONFIG_VFIO_MDEV=m
493CONFIG_VFIO_MDEV_DEVICE=m
494CONFIG_VIRTIO_PCI=m
495CONFIG_VIRTIO_BALLOON=m
496CONFIG_VIRTIO_INPUT=y
497CONFIG_S390_AP_IOMMU=y
498CONFIG_EXT4_FS=y
499CONFIG_EXT4_FS_POSIX_ACL=y
500CONFIG_EXT4_FS_SECURITY=y
501CONFIG_JBD2_DEBUG=y
502CONFIG_JFS_FS=m
503CONFIG_JFS_POSIX_ACL=y
504CONFIG_JFS_SECURITY=y
505CONFIG_JFS_STATISTICS=y
506CONFIG_XFS_FS=y
507CONFIG_XFS_QUOTA=y
508CONFIG_XFS_POSIX_ACL=y
509CONFIG_XFS_RT=y
510CONFIG_GFS2_FS=m
511CONFIG_GFS2_FS_LOCKING_DLM=y
512CONFIG_OCFS2_FS=m
513CONFIG_BTRFS_FS=y
514CONFIG_BTRFS_FS_POSIX_ACL=y
515CONFIG_NILFS2_FS=m
516CONFIG_FS_DAX=y
517CONFIG_EXPORTFS_BLOCK_OPS=y
518CONFIG_FS_ENCRYPTION=y
519CONFIG_FANOTIFY=y
520CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
521CONFIG_QUOTA_NETLINK_INTERFACE=y
522CONFIG_QFMT_V1=m
523CONFIG_QFMT_V2=m
524CONFIG_AUTOFS4_FS=m
525CONFIG_FUSE_FS=y
526CONFIG_CUSE=m
527CONFIG_OVERLAY_FS=m
528CONFIG_FSCACHE=m
529CONFIG_CACHEFILES=m
530CONFIG_ISO9660_FS=y
531CONFIG_JOLIET=y
532CONFIG_ZISOFS=y
533CONFIG_UDF_FS=m
534CONFIG_MSDOS_FS=m
535CONFIG_VFAT_FS=m
536CONFIG_NTFS_FS=m
537CONFIG_NTFS_RW=y
538CONFIG_PROC_KCORE=y
539CONFIG_TMPFS=y
540CONFIG_TMPFS_POSIX_ACL=y
541CONFIG_HUGETLBFS=y
542CONFIG_CONFIGFS_FS=m
543CONFIG_ECRYPT_FS=m
544CONFIG_CRAMFS=m
545CONFIG_SQUASHFS=m
546CONFIG_SQUASHFS_XATTR=y
547CONFIG_SQUASHFS_LZO=y
548CONFIG_SQUASHFS_XZ=y
549CONFIG_ROMFS_FS=m
550CONFIG_NFS_FS=m
551CONFIG_NFS_V3_ACL=y
552CONFIG_NFS_V4=m
553CONFIG_NFS_SWAP=y
554CONFIG_NFSD=m
555CONFIG_NFSD_V3_ACL=y
556CONFIG_NFSD_V4=y
557CONFIG_NFSD_V4_SECURITY_LABEL=y
558CONFIG_CIFS=m
559CONFIG_CIFS_STATS=y
560CONFIG_CIFS_STATS2=y
561CONFIG_CIFS_WEAK_PW_HASH=y
562CONFIG_CIFS_UPCALL=y
563CONFIG_CIFS_XATTR=y
564CONFIG_CIFS_POSIX=y
565# CONFIG_CIFS_DEBUG is not set
566CONFIG_CIFS_DFS_UPCALL=y
567CONFIG_NLS_DEFAULT="utf8"
568CONFIG_NLS_CODEPAGE_437=m
569CONFIG_NLS_CODEPAGE_850=m
570CONFIG_NLS_ASCII=m
571CONFIG_NLS_ISO8859_1=m
572CONFIG_NLS_ISO8859_15=m
573CONFIG_NLS_UTF8=m
574CONFIG_DLM=m
575CONFIG_PRINTK_TIME=y
576CONFIG_DEBUG_INFO=y
577CONFIG_DEBUG_INFO_DWARF4=y
578CONFIG_GDB_SCRIPTS=y
579# CONFIG_ENABLE_MUST_CHECK is not set
580CONFIG_FRAME_WARN=1024
581CONFIG_UNUSED_SYMBOLS=y
582CONFIG_MAGIC_SYSRQ=y
583CONFIG_DEBUG_MEMORY_INIT=y
584CONFIG_PANIC_ON_OOPS=y
585CONFIG_RCU_TORTURE_TEST=m
586CONFIG_RCU_CPU_STALL_TIMEOUT=60
587CONFIG_LATENCYTOP=y
588CONFIG_SCHED_TRACER=y
589CONFIG_FTRACE_SYSCALLS=y
590CONFIG_STACK_TRACER=y
591CONFIG_BLK_DEV_IO_TRACE=y
592CONFIG_FUNCTION_PROFILER=y
593CONFIG_HIST_TRIGGERS=y
594CONFIG_LKDTM=m
595CONFIG_PERCPU_TEST=m
596CONFIG_ATOMIC64_SELFTEST=y
597CONFIG_TEST_BPF=m
598CONFIG_BUG_ON_DATA_CORRUPTION=y
599CONFIG_S390_PTDUMP=y
600CONFIG_PERSISTENT_KEYRINGS=y
601CONFIG_BIG_KEYS=y
602CONFIG_ENCRYPTED_KEYS=m
603CONFIG_SECURITY=y
604CONFIG_SECURITY_NETWORK=y
605CONFIG_SECURITY_SELINUX=y
606CONFIG_SECURITY_SELINUX_BOOTPARAM=y
607CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
608CONFIG_SECURITY_SELINUX_DISABLE=y
609CONFIG_INTEGRITY_SIGNATURE=y
610CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
611CONFIG_IMA=y
612CONFIG_IMA_DEFAULT_HASH_SHA256=y
613CONFIG_IMA_WRITE_POLICY=y
614CONFIG_IMA_APPRAISE=y
615CONFIG_CRYPTO_FIPS=y
616CONFIG_CRYPTO_DH=m
617CONFIG_CRYPTO_ECDH=m
618CONFIG_CRYPTO_USER=m
619# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
620CONFIG_CRYPTO_PCRYPT=m
621CONFIG_CRYPTO_CRYPTD=m
622CONFIG_CRYPTO_TEST=m
623CONFIG_CRYPTO_CHACHA20POLY1305=m
624CONFIG_CRYPTO_LRW=m
625CONFIG_CRYPTO_PCBC=m
626CONFIG_CRYPTO_KEYWRAP=m
627CONFIG_CRYPTO_XCBC=m
628CONFIG_CRYPTO_VMAC=m
629CONFIG_CRYPTO_CRC32=m
630CONFIG_CRYPTO_MICHAEL_MIC=m
631CONFIG_CRYPTO_RMD128=m
632CONFIG_CRYPTO_RMD160=m
633CONFIG_CRYPTO_RMD256=m
634CONFIG_CRYPTO_RMD320=m
635CONFIG_CRYPTO_SHA512=m
636CONFIG_CRYPTO_SHA3=m
637CONFIG_CRYPTO_TGR192=m
638CONFIG_CRYPTO_WP512=m
639CONFIG_CRYPTO_AES_TI=m
640CONFIG_CRYPTO_ANUBIS=m
641CONFIG_CRYPTO_BLOWFISH=m
642CONFIG_CRYPTO_CAMELLIA=m
643CONFIG_CRYPTO_CAST5=m
644CONFIG_CRYPTO_CAST6=m
645CONFIG_CRYPTO_FCRYPT=m
646CONFIG_CRYPTO_KHAZAD=m
647CONFIG_CRYPTO_SALSA20=m
648CONFIG_CRYPTO_SEED=m
649CONFIG_CRYPTO_SERPENT=m
650CONFIG_CRYPTO_TEA=m
651CONFIG_CRYPTO_TWOFISH=m
652CONFIG_CRYPTO_842=m
653CONFIG_CRYPTO_LZ4=m
654CONFIG_CRYPTO_LZ4HC=m
655CONFIG_CRYPTO_ANSI_CPRNG=m
656CONFIG_CRYPTO_USER_API_HASH=m
657CONFIG_CRYPTO_USER_API_SKCIPHER=m
658CONFIG_CRYPTO_USER_API_RNG=m
659CONFIG_CRYPTO_USER_API_AEAD=m
660CONFIG_ZCRYPT=m
661CONFIG_PKEY=m
662CONFIG_CRYPTO_PAES_S390=m
663CONFIG_CRYPTO_SHA1_S390=m
664CONFIG_CRYPTO_SHA256_S390=m
665CONFIG_CRYPTO_SHA512_S390=m
666CONFIG_CRYPTO_DES_S390=m
667CONFIG_CRYPTO_AES_S390=m
668CONFIG_CRYPTO_GHASH_S390=m
669CONFIG_CRYPTO_CRC32_S390=y
670CONFIG_CRC7=m
671CONFIG_CRC8=m
672CONFIG_CORDIC=m
673CONFIG_CMM=m
674CONFIG_APPLDATA_BASE=y
675CONFIG_KVM=m
676CONFIG_KVM_S390_UCONTROL=y
677CONFIG_VHOST_NET=m
678CONFIG_VHOST_VSOCK=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 7dc7f58c4287..d92bab844b73 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -24,7 +24,6 @@ CONFIG_CRASH_DUMP=y
24# CONFIG_SECCOMP is not set 24# CONFIG_SECCOMP is not set
25CONFIG_NET=y 25CONFIG_NET=y
26# CONFIG_IUCV is not set 26# CONFIG_IUCV is not set
27CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
28CONFIG_DEVTMPFS=y 27CONFIG_DEVTMPFS=y
29CONFIG_BLK_DEV_RAM=y 28CONFIG_BLK_DEV_RAM=y
30# CONFIG_BLK_DEV_XPRAM is not set 29# CONFIG_BLK_DEV_XPRAM is not set
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 86aed30fad3a..eeeb6a7737a4 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -137,7 +137,7 @@ static struct shash_alg ghash_alg = {
137static int __init ghash_mod_init(void) 137static int __init ghash_mod_init(void)
138{ 138{
139 if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH)) 139 if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
140 return -EOPNOTSUPP; 140 return -ENODEV;
141 141
142 return crypto_register_shash(&ghash_alg); 142 return crypto_register_shash(&ghash_alg);
143} 143}
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 12cca467af7d..d977643fa627 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -824,7 +824,7 @@ static int __init prng_init(void)
824 824
825 /* check if the CPU has a PRNG */ 825 /* check if the CPU has a PRNG */
826 if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) 826 if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
827 return -EOPNOTSUPP; 827 return -ENODEV;
828 828
829 /* check if TRNG subfunction is available */ 829 /* check if TRNG subfunction is available */
830 if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) 830 if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@@ -837,7 +837,7 @@ static int __init prng_init(void)
837 if (prng_mode == PRNG_MODE_SHA512) { 837 if (prng_mode == PRNG_MODE_SHA512) {
838 pr_err("The prng module cannot " 838 pr_err("The prng module cannot "
839 "start in SHA-512 mode\n"); 839 "start in SHA-512 mode\n");
840 return -EOPNOTSUPP; 840 return -ENODEV;
841 } 841 }
842 prng_mode = PRNG_MODE_TDES; 842 prng_mode = PRNG_MODE_TDES;
843 } else 843 } else
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 009572e8276d..7c15542d3685 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -86,7 +86,7 @@ static struct shash_alg alg = {
86static int __init sha1_s390_init(void) 86static int __init sha1_s390_init(void)
87{ 87{
88 if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1)) 88 if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
89 return -EOPNOTSUPP; 89 return -ENODEV;
90 return crypto_register_shash(&alg); 90 return crypto_register_shash(&alg);
91} 91}
92 92
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 62833a1d8724..af7505148f80 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -117,7 +117,7 @@ static int __init sha256_s390_init(void)
117 int ret; 117 int ret;
118 118
119 if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) 119 if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
120 return -EOPNOTSUPP; 120 return -ENODEV;
121 ret = crypto_register_shash(&sha256_alg); 121 ret = crypto_register_shash(&sha256_alg);
122 if (ret < 0) 122 if (ret < 0)
123 goto out; 123 goto out;
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index be589c340d15..ad29db085a18 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -127,7 +127,7 @@ static int __init init(void)
127 int ret; 127 int ret;
128 128
129 if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512)) 129 if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
130 return -EOPNOTSUPP; 130 return -ENODEV;
131 if ((ret = crypto_register_shash(&sha512_alg)) < 0) 131 if ((ret = crypto_register_shash(&sha512_alg)) < 0)
132 goto out; 132 goto out;
133 if ((ret = crypto_register_shash(&sha384_alg)) < 0) 133 if ((ret = crypto_register_shash(&sha384_alg)) < 0)
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index c10d2ee2dfda..01936fdfaddb 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -11,6 +11,7 @@
11#define _ASM_S390_AIRQ_H 11#define _ASM_S390_AIRQ_H
12 12
13#include <linux/bit_spinlock.h> 13#include <linux/bit_spinlock.h>
14#include <linux/dma-mapping.h>
14 15
15struct airq_struct { 16struct airq_struct {
16 struct hlist_node list; /* Handler queueing. */ 17 struct hlist_node list; /* Handler queueing. */
@@ -29,6 +30,7 @@ void unregister_adapter_interrupt(struct airq_struct *airq);
29/* Adapter interrupt bit vector */ 30/* Adapter interrupt bit vector */
30struct airq_iv { 31struct airq_iv {
31 unsigned long *vector; /* Adapter interrupt bit vector */ 32 unsigned long *vector; /* Adapter interrupt bit vector */
33 dma_addr_t vector_dma; /* Adapter interrupt bit vector dma */
32 unsigned long *avail; /* Allocation bit mask for the bit vector */ 34 unsigned long *avail; /* Allocation bit mask for the bit vector */
33 unsigned long *bitlock; /* Lock bit mask for the bit vector */ 35 unsigned long *bitlock; /* Lock bit mask for the bit vector */
34 unsigned long *ptr; /* Pointer associated with each bit */ 36 unsigned long *ptr; /* Pointer associated with each bit */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index fd20ab5d4cf7..491ad53a0d4e 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -84,9 +84,9 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
84 84
85#define ATOMIC64_INIT(i) { (i) } 85#define ATOMIC64_INIT(i) { (i) }
86 86
87static inline long atomic64_read(const atomic64_t *v) 87static inline s64 atomic64_read(const atomic64_t *v)
88{ 88{
89 long c; 89 s64 c;
90 90
91 asm volatile( 91 asm volatile(
92 " lg %0,%1\n" 92 " lg %0,%1\n"
@@ -94,49 +94,49 @@ static inline long atomic64_read(const atomic64_t *v)
94 return c; 94 return c;
95} 95}
96 96
97static inline void atomic64_set(atomic64_t *v, long i) 97static inline void atomic64_set(atomic64_t *v, s64 i)
98{ 98{
99 asm volatile( 99 asm volatile(
100 " stg %1,%0\n" 100 " stg %1,%0\n"
101 : "=Q" (v->counter) : "d" (i)); 101 : "=Q" (v->counter) : "d" (i));
102} 102}
103 103
104static inline long atomic64_add_return(long i, atomic64_t *v) 104static inline s64 atomic64_add_return(s64 i, atomic64_t *v)
105{ 105{
106 return __atomic64_add_barrier(i, &v->counter) + i; 106 return __atomic64_add_barrier(i, (long *)&v->counter) + i;
107} 107}
108 108
109static inline long atomic64_fetch_add(long i, atomic64_t *v) 109static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v)
110{ 110{
111 return __atomic64_add_barrier(i, &v->counter); 111 return __atomic64_add_barrier(i, (long *)&v->counter);
112} 112}
113 113
114static inline void atomic64_add(long i, atomic64_t *v) 114static inline void atomic64_add(s64 i, atomic64_t *v)
115{ 115{
116#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 116#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
117 if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { 117 if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
118 __atomic64_add_const(i, &v->counter); 118 __atomic64_add_const(i, (long *)&v->counter);
119 return; 119 return;
120 } 120 }
121#endif 121#endif
122 __atomic64_add(i, &v->counter); 122 __atomic64_add(i, (long *)&v->counter);
123} 123}
124 124
125#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) 125#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
126 126
127static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) 127static inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
128{ 128{
129 return __atomic64_cmpxchg(&v->counter, old, new); 129 return __atomic64_cmpxchg((long *)&v->counter, old, new);
130} 130}
131 131
132#define ATOMIC64_OPS(op) \ 132#define ATOMIC64_OPS(op) \
133static inline void atomic64_##op(long i, atomic64_t *v) \ 133static inline void atomic64_##op(s64 i, atomic64_t *v) \
134{ \ 134{ \
135 __atomic64_##op(i, &v->counter); \ 135 __atomic64_##op(i, (long *)&v->counter); \
136} \ 136} \
137static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ 137static inline long atomic64_fetch_##op(s64 i, atomic64_t *v) \
138{ \ 138{ \
139 return __atomic64_##op##_barrier(i, &v->counter); \ 139 return __atomic64_##op##_barrier(i, (long *)&v->counter); \
140} 140}
141 141
142ATOMIC64_OPS(and) 142ATOMIC64_OPS(and)
@@ -145,8 +145,8 @@ ATOMIC64_OPS(xor)
145 145
146#undef ATOMIC64_OPS 146#undef ATOMIC64_OPS
147 147
148#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v) 148#define atomic64_sub_return(_i, _v) atomic64_add_return(-(s64)(_i), _v)
149#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v) 149#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(s64)(_i), _v)
150#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v) 150#define atomic64_sub(_i, _v) atomic64_add(-(s64)(_i), _v)
151 151
152#endif /* __ARCH_S390_ATOMIC__ */ 152#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index a29dd430fb40..865ce1cb86d5 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -226,6 +226,10 @@ extern int ccw_device_enable_console(struct ccw_device *);
226extern void ccw_device_wait_idle(struct ccw_device *); 226extern void ccw_device_wait_idle(struct ccw_device *);
227extern int ccw_device_force_console(struct ccw_device *); 227extern int ccw_device_force_console(struct ccw_device *);
228 228
229extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
230extern void ccw_device_dma_free(struct ccw_device *cdev,
231 void *cpu_addr, size_t size);
232
229int ccw_device_siosl(struct ccw_device *); 233int ccw_device_siosl(struct ccw_device *);
230 234
231extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); 235extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 1727180e8ca1..b5bfb3123cb1 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -7,6 +7,7 @@
7 7
8#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <linux/bitops.h> 9#include <linux/bitops.h>
10#include <linux/genalloc.h>
10#include <asm/types.h> 11#include <asm/types.h>
11 12
12#define LPM_ANYPATH 0xff 13#define LPM_ANYPATH 0xff
@@ -264,6 +265,36 @@ struct ciw {
264#define CIW_TYPE_RNI 0x2 /* read node identifier */ 265#define CIW_TYPE_RNI 0x2 /* read node identifier */
265 266
266/* 267/*
268 * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
269 */
270
271#define ND_VALIDITY_VALID 0
272#define ND_VALIDITY_OUTDATED 1
273#define ND_VALIDITY_INVALID 2
274
275struct node_descriptor {
276 /* Flags. */
277 union {
278 struct {
279 u32 validity:3;
280 u32 reserved:5;
281 } __packed;
282 u8 byte0;
283 } __packed;
284
285 /* Node parameters. */
286 u32 params:24;
287
288 /* Node ID. */
289 char type[6];
290 char model[3];
291 char manufacturer[3];
292 char plant[2];
293 char seq[12];
294 u16 tag;
295} __packed;
296
297/*
267 * Flags used as input parameters for do_IO() 298 * Flags used as input parameters for do_IO()
268 */ 299 */
269#define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */ 300#define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */
@@ -328,6 +359,16 @@ static inline u8 pathmask_to_pos(u8 mask)
328void channel_subsystem_reinit(void); 359void channel_subsystem_reinit(void);
329extern void css_schedule_reprobe(void); 360extern void css_schedule_reprobe(void);
330 361
362extern void *cio_dma_zalloc(size_t size);
363extern void cio_dma_free(void *cpu_addr, size_t size);
364extern struct device *cio_get_dma_css_dev(void);
365
366void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
367 size_t size);
368void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size);
369void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
370struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
371
331/* Function from drivers/s390/cio/chsc.c */ 372/* Function from drivers/s390/cio/chsc.c */
332int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); 373int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
333int chsc_sstpi(void *page, void *result, size_t size); 374int chsc_sstpi(void *page, void *result, size_t size);
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 3bda757317cf..0cf6b53587db 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -112,13 +112,8 @@ union ctlreg2 {
112 }; 112 };
113}; 113};
114 114
115#ifdef CONFIG_SMP 115#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
116# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) 116#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
117# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
118#else
119# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
120# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
121#endif
122 117
123#endif /* __ASSEMBLY__ */ 118#endif /* __ASSEMBLY__ */
124#endif /* __ASM_CTL_REG_H */ 119#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index c305d39f5016..310134015541 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -107,13 +107,37 @@ void debug_unregister(debug_info_t *id);
107void debug_set_level(debug_info_t *id, int new_level); 107void debug_set_level(debug_info_t *id, int new_level);
108 108
109void debug_set_critical(void); 109void debug_set_critical(void);
110
110void debug_stop_all(void); 111void debug_stop_all(void);
111 112
113/**
114 * debug_level_enabled() - Returns true if debug events for the specified
115 * level would be logged. Otherwise returns false.
116 *
117 * @id: handle for debug log
118 * @level: debug level
119 *
120 * Return:
121 * - %true if level is less or equal to the current debug level.
122 */
112static inline bool debug_level_enabled(debug_info_t *id, int level) 123static inline bool debug_level_enabled(debug_info_t *id, int level)
113{ 124{
114 return level <= id->level; 125 return level <= id->level;
115} 126}
116 127
128/**
129 * debug_event() - writes binary debug entry to active debug area
130 * (if level <= actual debug level)
131 *
132 * @id: handle for debug log
133 * @level: debug level
134 * @data: pointer to data for debug entry
135 * @length: length of data in bytes
136 *
137 * Return:
138 * - Address of written debug entry
139 * - %NULL if error
140 */
117static inline debug_entry_t *debug_event(debug_info_t *id, int level, 141static inline debug_entry_t *debug_event(debug_info_t *id, int level,
118 void *data, int length) 142 void *data, int length)
119{ 143{
@@ -122,6 +146,18 @@ static inline debug_entry_t *debug_event(debug_info_t *id, int level,
122 return debug_event_common(id, level, data, length); 146 return debug_event_common(id, level, data, length);
123} 147}
124 148
149/**
150 * debug_int_event() - writes unsigned integer debug entry to active debug area
151 * (if level <= actual debug level)
152 *
153 * @id: handle for debug log
154 * @level: debug level
155 * @tag: integer value for debug entry
156 *
157 * Return:
158 * - Address of written debug entry
159 * - %NULL if error
160 */
125static inline debug_entry_t *debug_int_event(debug_info_t *id, int level, 161static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
126 unsigned int tag) 162 unsigned int tag)
127{ 163{
@@ -132,6 +168,18 @@ static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
132 return debug_event_common(id, level, &t, sizeof(unsigned int)); 168 return debug_event_common(id, level, &t, sizeof(unsigned int));
133} 169}
134 170
171/**
172 * debug_long_event() - writes unsigned long debug entry to active debug area
173 * (if level <= actual debug level)
174 *
175 * @id: handle for debug log
176 * @level: debug level
177 * @tag: long integer value for debug entry
178 *
179 * Return:
180 * - Address of written debug entry
181 * - %NULL if error
182 */
135static inline debug_entry_t *debug_long_event(debug_info_t *id, int level, 183static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
136 unsigned long tag) 184 unsigned long tag)
137{ 185{
@@ -142,6 +190,18 @@ static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
142 return debug_event_common(id, level, &t, sizeof(unsigned long)); 190 return debug_event_common(id, level, &t, sizeof(unsigned long));
143} 191}
144 192
193/**
194 * debug_text_event() - writes string debug entry in ascii format to active
195 * debug area (if level <= actual debug level)
196 *
197 * @id: handle for debug log
198 * @level: debug level
199 * @txt: string for debug entry
200 *
201 * Return:
202 * - Address of written debug entry
203 * - %NULL if error
204 */
145static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, 205static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
146 const char *txt) 206 const char *txt)
147{ 207{
@@ -152,12 +212,28 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
152 212
153/* 213/*
154 * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are 214 * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
155 * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! 215 * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
156 */ 216 */
157extern debug_entry_t * 217extern debug_entry_t *
158__debug_sprintf_event(debug_info_t *id, int level, char *string, ...) 218__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
159 __attribute__ ((format(printf, 3, 4))); 219 __attribute__ ((format(printf, 3, 4)));
160 220
221/**
222 * debug_sprintf_event() - writes debug entry with format string
223 * and varargs (longs) to active debug area
224 * (if level $<=$ actual debug level).
225 *
226 * @_id: handle for debug log
227 * @_level: debug level
228 * @_fmt: format string for debug entry
229 * @...: varargs used as in sprintf()
230 *
231 * Return:
232 * - Address of written debug entry
233 * - %NULL if error
234 *
235 * floats and long long datatypes cannot be used as varargs.
236 */
161#define debug_sprintf_event(_id, _level, _fmt, ...) \ 237#define debug_sprintf_event(_id, _level, _fmt, ...) \
162({ \ 238({ \
163 debug_entry_t *__ret; \ 239 debug_entry_t *__ret; \
@@ -172,6 +248,20 @@ __debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
172 __ret; \ 248 __ret; \
173}) 249})
174 250
251/**
252 * debug_exception() - writes binary debug entry to active debug area
253 * (if level <= actual debug level)
254 * and switches to next debug area
255 *
256 * @id: handle for debug log
257 * @level: debug level
258 * @data: pointer to data for debug entry
259 * @length: length of data in bytes
260 *
261 * Return:
262 * - Address of written debug entry
263 * - %NULL if error
264 */
175static inline debug_entry_t *debug_exception(debug_info_t *id, int level, 265static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
176 void *data, int length) 266 void *data, int length)
177{ 267{
@@ -180,6 +270,19 @@ static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
180 return debug_exception_common(id, level, data, length); 270 return debug_exception_common(id, level, data, length);
181} 271}
182 272
273/**
274 * debug_int_exception() - writes unsigned int debug entry to active debug area
275 * (if level <= actual debug level)
276 * and switches to next debug area
277 *
278 * @id: handle for debug log
279 * @level: debug level
280 * @tag: integer value for debug entry
281 *
282 * Return:
283 * - Address of written debug entry
284 * - %NULL if error
285 */
183static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level, 286static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
184 unsigned int tag) 287 unsigned int tag)
185{ 288{
@@ -190,6 +293,19 @@ static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
190 return debug_exception_common(id, level, &t, sizeof(unsigned int)); 293 return debug_exception_common(id, level, &t, sizeof(unsigned int));
191} 294}
192 295
296/**
297 * debug_long_exception() - writes long debug entry to active debug area
298 * (if level <= actual debug level)
299 * and switches to next debug area
300 *
301 * @id: handle for debug log
302 * @level: debug level
303 * @tag: long integer value for debug entry
304 *
305 * Return:
306 * - Address of written debug entry
307 * - %NULL if error
308 */
193static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level, 309static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
194 unsigned long tag) 310 unsigned long tag)
195{ 311{
@@ -200,6 +316,20 @@ static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
200 return debug_exception_common(id, level, &t, sizeof(unsigned long)); 316 return debug_exception_common(id, level, &t, sizeof(unsigned long));
201} 317}
202 318
319/**
320 * debug_text_exception() - writes string debug entry in ascii format to active
321 * debug area (if level <= actual debug level)
322 * and switches to next debug area
323 * area
324 *
325 * @id: handle for debug log
326 * @level: debug level
327 * @txt: string for debug entry
328 *
329 * Return:
330 * - Address of written debug entry
331 * - %NULL if error
332 */
203static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, 333static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
204 const char *txt) 334 const char *txt)
205{ 335{
@@ -210,12 +340,30 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
210 340
211/* 341/*
212 * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are 342 * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
213 * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! 343 * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
214 */ 344 */
215extern debug_entry_t * 345extern debug_entry_t *
216__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) 346__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
217 __attribute__ ((format(printf, 3, 4))); 347 __attribute__ ((format(printf, 3, 4)));
218 348
349
350/**
351 * debug_sprintf_exception() - writes debug entry with format string and
352 * varargs (longs) to active debug area
353 * (if level <= actual debug level)
354 * and switches to next debug area.
355 *
356 * @_id: handle for debug log
357 * @_level: debug level
358 * @_fmt: format string for debug entry
359 * @...: varargs used as in sprintf()
360 *
361 * Return:
362 * - Address of written debug entry
363 * - %NULL if error
364 *
365 * floats and long long datatypes cannot be used as varargs.
366 */
219#define debug_sprintf_exception(_id, _level, _fmt, ...) \ 367#define debug_sprintf_exception(_id, _level, _fmt, ...) \
220({ \ 368({ \
221 debug_entry_t *__ret; \ 369 debug_entry_t *__ret; \
@@ -231,6 +379,7 @@ __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
231}) 379})
232 380
233int debug_register_view(debug_info_t *id, struct debug_view *view); 381int debug_register_view(debug_info_t *id, struct debug_view *view);
382
234int debug_unregister_view(debug_info_t *id, struct debug_view *view); 383int debug_unregister_view(debug_info_t *id, struct debug_view *view);
235 384
236/* 385/*
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index e78cda94456b..68c476b20b57 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -59,6 +59,18 @@ static inline int test_facility(unsigned long nr)
59 return __test_facility(nr, &S390_lowcore.stfle_fac_list); 59 return __test_facility(nr, &S390_lowcore.stfle_fac_list);
60} 60}
61 61
62static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
63{
64 register unsigned long reg0 asm("0") = size - 1;
65
66 asm volatile(
67 ".insn s,0xb2b00000,0(%1)" /* stfle */
68 : "+d" (reg0)
69 : "a" (stfle_fac_list)
70 : "memory", "cc");
71 return reg0;
72}
73
62/** 74/**
63 * stfle - Store facility list extended 75 * stfle - Store facility list extended
64 * @stfle_fac_list: array where facility list can be stored 76 * @stfle_fac_list: array where facility list can be stored
@@ -75,13 +87,8 @@ static inline void __stfle(u64 *stfle_fac_list, int size)
75 memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); 87 memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
76 if (S390_lowcore.stfl_fac_list & 0x01000000) { 88 if (S390_lowcore.stfl_fac_list & 0x01000000) {
77 /* More facility bits available with stfle */ 89 /* More facility bits available with stfle */
78 register unsigned long reg0 asm("0") = size - 1; 90 nr = __stfle_asm(stfle_fac_list, size);
79 91 nr = min_t(unsigned long, (nr + 1) * 8, size * 8);
80 asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */
81 : "+d" (reg0)
82 : "a" (stfle_fac_list)
83 : "memory", "cc");
84 nr = (reg0 + 1) * 8; /* # bytes stored by stfle */
85 } 92 }
86 memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); 93 memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
87} 94}
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index 15578fd762f6..6fb7aced104a 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -122,8 +122,7 @@ idal_buffer_alloc(size_t size, int page_order)
122 122
123 nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; 123 nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
124 nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG; 124 nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
125 ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *), 125 ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL);
126 GFP_DMA | GFP_KERNEL);
127 if (ib == NULL) 126 if (ib == NULL)
128 return ERR_PTR(-ENOMEM); 127 return ERR_PTR(-ENOMEM);
129 ib->size = size; 128 ib->size = size;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2b00a3ebee08..4a928e2c667b 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -18,6 +18,7 @@
18#include <linux/kvm_host.h> 18#include <linux/kvm_host.h>
19#include <linux/kvm.h> 19#include <linux/kvm.h>
20#include <linux/seqlock.h> 20#include <linux/seqlock.h>
21#include <linux/module.h>
21#include <asm/debug.h> 22#include <asm/debug.h>
22#include <asm/cpu.h> 23#include <asm/cpu.h>
23#include <asm/fpu/api.h> 24#include <asm/fpu/api.h>
@@ -720,8 +721,14 @@ struct kvm_s390_cpu_model {
720 unsigned short ibc; 721 unsigned short ibc;
721}; 722};
722 723
724struct kvm_s390_module_hook {
725 int (*hook)(struct kvm_vcpu *vcpu);
726 struct module *owner;
727};
728
723struct kvm_s390_crypto { 729struct kvm_s390_crypto {
724 struct kvm_s390_crypto_cb *crycb; 730 struct kvm_s390_crypto_cb *crycb;
731 struct kvm_s390_module_hook *pqap_hook;
725 __u32 crycbd; 732 __u32 crycbd;
726 __u8 aes_kw; 733 __u8 aes_kw;
727 __u8 dea_kw; 734 __u8 dea_kw;
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..3eb018508190
--- /dev/null
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -0,0 +1,17 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef S390_MEM_ENCRYPT_H__
3#define S390_MEM_ENCRYPT_H__
4
5#ifndef __ASSEMBLY__
6
7#define sme_me_mask 0ULL
8
9static inline bool sme_active(void) { return false; }
10extern bool sev_active(void);
11
12int set_memory_encrypted(unsigned long addr, int numpages);
13int set_memory_decrypted(unsigned long addr, int numpages);
14
15#endif /* __ASSEMBLY__ */
16
17#endif /* S390_MEM_ENCRYPT_H__ */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd55326..a2399eff84ca 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -194,6 +194,11 @@ int zpci_init_iommu(struct zpci_dev *zdev);
194void zpci_destroy_iommu(struct zpci_dev *zdev); 194void zpci_destroy_iommu(struct zpci_dev *zdev);
195 195
196#ifdef CONFIG_PCI 196#ifdef CONFIG_PCI
197static inline bool zpci_use_mio(struct zpci_dev *zdev)
198{
199 return static_branch_likely(&have_mio) && zdev->mio_capable;
200}
201
197/* Error handling and recovery */ 202/* Error handling and recovery */
198void zpci_event_error(void *); 203void zpci_event_error(void *);
199void zpci_event_availability(void *); 204void zpci_event_availability(void *);
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 0095ddb58ff6..50b4ce8cddfd 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -16,7 +16,7 @@
16 * per cpu area, use weak definitions to force the compiler to 16 * per cpu area, use weak definitions to force the compiler to
17 * generate external references. 17 * generate external references.
18 */ 18 */
19#if defined(CONFIG_SMP) && defined(MODULE) 19#if defined(MODULE)
20#define ARCH_NEEDS_WEAK_PER_CPU 20#define ARCH_NEEDS_WEAK_PER_CPU
21#endif 21#endif
22 22
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b0fcbc37b637..14883b1562e0 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -36,6 +36,7 @@
36 36
37#ifndef __ASSEMBLY__ 37#ifndef __ASSEMBLY__
38 38
39#include <linux/cpumask.h>
39#include <linux/linkage.h> 40#include <linux/linkage.h>
40#include <linux/irqflags.h> 41#include <linux/irqflags.h>
41#include <asm/cpu.h> 42#include <asm/cpu.h>
@@ -221,12 +222,6 @@ static __no_kasan_or_inline unsigned short stap(void)
221 return cpu_address; 222 return cpu_address;
222} 223}
223 224
224/*
225 * Give up the time slice of the virtual PU.
226 */
227#define cpu_relax_yield cpu_relax_yield
228void cpu_relax_yield(void);
229
230#define cpu_relax() barrier() 225#define cpu_relax() barrier()
231 226
232#define ECAG_CACHE_ATTRIBUTE 0 227#define ECAG_CACHE_ATTRIBUTE 0
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 3907ead27ffa..b157a81fb977 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -9,9 +9,6 @@
9#define __ASM_SMP_H 9#define __ASM_SMP_H
10 10
11#include <asm/sigp.h> 11#include <asm/sigp.h>
12
13#ifdef CONFIG_SMP
14
15#include <asm/lowcore.h> 12#include <asm/lowcore.h>
16 13
17#define raw_smp_processor_id() (S390_lowcore.cpu_nr) 14#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
@@ -40,33 +37,6 @@ extern int smp_cpu_get_polarization(int cpu);
40extern void smp_fill_possible_mask(void); 37extern void smp_fill_possible_mask(void);
41extern void smp_detect_cpus(void); 38extern void smp_detect_cpus(void);
42 39
43#else /* CONFIG_SMP */
44
45#define smp_cpu_mtid 0
46
47static inline void smp_call_ipl_cpu(void (*func)(void *), void *data)
48{
49 func(data);
50}
51
52static inline void smp_call_online_cpu(void (*func)(void *), void *data)
53{
54 func(data);
55}
56
57static inline void smp_emergency_stop(void)
58{
59}
60
61static inline int smp_find_processor_id(u16 address) { return 0; }
62static inline int smp_store_status(int cpu) { return 0; }
63static inline int smp_vcpu_scheduled(int cpu) { return 1; }
64static inline void smp_yield_cpu(int cpu) { }
65static inline void smp_fill_possible_mask(void) { }
66static inline void smp_detect_cpus(void) { }
67
68#endif /* CONFIG_SMP */
69
70static inline void smp_stop_cpu(void) 40static inline void smp_stop_cpu(void)
71{ 41{
72 u16 pcpu = stap(); 42 u16 pcpu = stap();
@@ -83,14 +53,9 @@ static inline int smp_get_base_cpu(int cpu)
83 return cpu - (cpu % (smp_cpu_mtid + 1)); 53 return cpu - (cpu % (smp_cpu_mtid + 1));
84} 54}
85 55
86#ifdef CONFIG_HOTPLUG_CPU
87extern int smp_rescan_cpus(void); 56extern int smp_rescan_cpus(void);
88extern void __noreturn cpu_die(void); 57extern void __noreturn cpu_die(void);
89extern void __cpu_die(unsigned int cpu); 58extern void __cpu_die(unsigned int cpu);
90extern int __cpu_disable(void); 59extern int __cpu_disable(void);
91#else
92static inline int smp_rescan_cpus(void) { return 0; }
93static inline void cpu_die(void) { }
94#endif
95 60
96#endif /* __ASM_SMP_H */ 61#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 0a29588aa00b..c02bff33f6c7 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -20,11 +20,7 @@
20 20
21extern int spin_retry; 21extern int spin_retry;
22 22
23#ifndef CONFIG_SMP
24static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
25#else
26bool arch_vcpu_is_preempted(int cpu); 23bool arch_vcpu_is_preempted(int cpu);
27#endif
28 24
29#define vcpu_is_preempted arch_vcpu_is_preempted 25#define vcpu_is_preempted arch_vcpu_is_preempted
30 26
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 8c840f0904f3..82703e03f35d 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -32,7 +32,6 @@ static inline void __tlb_flush_idte(unsigned long asce)
32 : : "a" (opt), "a" (asce) : "cc"); 32 : : "a" (opt), "a" (asce) : "cc");
33} 33}
34 34
35#ifdef CONFIG_SMP
36void smp_ptlb_all(void); 35void smp_ptlb_all(void);
37 36
38/* 37/*
@@ -83,22 +82,6 @@ static inline void __tlb_flush_kernel(void)
83 else 82 else
84 __tlb_flush_global(); 83 __tlb_flush_global();
85} 84}
86#else
87#define __tlb_flush_global() __tlb_flush_local()
88
89/*
90 * Flush TLB entries for a specific ASCE on all CPUs.
91 */
92static inline void __tlb_flush_mm(struct mm_struct *mm)
93{
94 __tlb_flush_local();
95}
96
97static inline void __tlb_flush_kernel(void)
98{
99 __tlb_flush_local();
100}
101#endif
102 85
103static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) 86static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
104{ 87{
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 6eb2ef105d87..d827b5b9a32c 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -79,23 +79,4 @@ static inline void unwind_module_init(struct module *mod, void *orc_ip,
79 size_t orc_ip_size, void *orc, 79 size_t orc_ip_size, void *orc,
80 size_t orc_size) {} 80 size_t orc_size) {}
81 81
82#ifdef CONFIG_KASAN
83/*
84 * This disables KASAN checking when reading a value from another task's stack,
85 * since the other task could be running on another CPU and could have poisoned
86 * the stack in the meantime.
87 */
88#define READ_ONCE_TASK_STACK(task, x) \
89({ \
90 unsigned long val; \
91 if (task == current) \
92 val = READ_ONCE(x); \
93 else \
94 val = READ_ONCE_NOCHECK(x); \
95 val; \
96})
97#else
98#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x)
99#endif
100
101#endif /* _ASM_S390_UNWIND_H */ 82#endif /* _ASM_S390_UNWIND_H */
diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h
index 45c9ec984e6b..455da46e3193 100644
--- a/arch/s390/include/uapi/asm/runtime_instr.h
+++ b/arch/s390/include/uapi/asm/runtime_instr.h
@@ -57,7 +57,7 @@ struct runtime_instr_cb {
57 __u64 sf; 57 __u64 sf;
58 __u64 rsic; 58 __u64 rsic;
59 __u64 reserved8; 59 __u64 reserved8;
60} __packed __aligned(8); 60} __attribute__((__packed__, __aligned__(8)));
61 61
62static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) 62static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
63{ 63{
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index b0478d01a0c5..0f255b54b051 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -53,6 +53,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
53obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o 53obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
54obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o 54obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
55obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o 55obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
56obj-y += smp.o
56 57
57extra-y += head64.o vmlinux.lds 58extra-y += head64.o vmlinux.lds
58 59
@@ -60,7 +61,6 @@ obj-$(CONFIG_SYSFS) += nospec-sysfs.o
60CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) 61CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
61 62
62obj-$(CONFIG_MODULES) += module.o 63obj-$(CONFIG_MODULES) += module.o
63obj-$(CONFIG_SMP) += smp.o
64obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o 64obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
65obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o 65obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
66obj-$(CONFIG_AUDIT) += audit.o 66obj-$(CONFIG_AUDIT) += audit.o
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 0ebf08c3b35e..6d321f5f101d 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -647,11 +647,23 @@ static int debug_close(struct inode *inode, struct file *file)
647 return 0; /* success */ 647 return 0; /* success */
648} 648}
649 649
650/* 650/**
651 * debug_register_mode: 651 * debug_register_mode() - creates and initializes debug area.
652 * - Creates and initializes debug area for the caller 652 *
653 * The mode parameter allows to specify access rights for the s390dbf files 653 * @name: Name of debug log (e.g. used for debugfs entry)
654 * - Returns handle for debug area 654 * @pages_per_area: Number of pages, which will be allocated per area
655 * @nr_areas: Number of debug areas
656 * @buf_size: Size of data area in each debug entry
657 * @mode: File mode for debugfs files. E.g. S_IRWXUGO
658 * @uid: User ID for debugfs files. Currently only 0 is supported.
659 * @gid: Group ID for debugfs files. Currently only 0 is supported.
660 *
661 * Return:
662 * - Handle for generated debug area
663 * - %NULL if register failed
664 *
665 * Allocates memory for a debug log.
666 * Must not be called within an interrupt handler.
655 */ 667 */
656debug_info_t *debug_register_mode(const char *name, int pages_per_area, 668debug_info_t *debug_register_mode(const char *name, int pages_per_area,
657 int nr_areas, int buf_size, umode_t mode, 669 int nr_areas, int buf_size, umode_t mode,
@@ -681,10 +693,21 @@ out:
681} 693}
682EXPORT_SYMBOL(debug_register_mode); 694EXPORT_SYMBOL(debug_register_mode);
683 695
684/* 696/**
685 * debug_register: 697 * debug_register() - creates and initializes debug area with default file mode.
686 * - creates and initializes debug area for the caller 698 *
687 * - returns handle for debug area 699 * @name: Name of debug log (e.g. used for debugfs entry)
700 * @pages_per_area: Number of pages, which will be allocated per area
701 * @nr_areas: Number of debug areas
702 * @buf_size: Size of data area in each debug entry
703 *
704 * Return:
705 * - Handle for generated debug area
706 * - %NULL if register failed
707 *
708 * Allocates memory for a debug log.
709 * The debugfs file mode access permissions are read and write for user.
710 * Must not be called within an interrupt handler.
688 */ 711 */
689debug_info_t *debug_register(const char *name, int pages_per_area, 712debug_info_t *debug_register(const char *name, int pages_per_area,
690 int nr_areas, int buf_size) 713 int nr_areas, int buf_size)
@@ -694,9 +717,13 @@ debug_info_t *debug_register(const char *name, int pages_per_area,
694} 717}
695EXPORT_SYMBOL(debug_register); 718EXPORT_SYMBOL(debug_register);
696 719
697/* 720/**
698 * debug_unregister: 721 * debug_unregister() - give back debug area.
699 * - give back debug area 722 *
723 * @id: handle for debug log
724 *
725 * Return:
726 * none
700 */ 727 */
701void debug_unregister(debug_info_t *id) 728void debug_unregister(debug_info_t *id)
702{ 729{
@@ -745,9 +772,14 @@ out:
745 return rc; 772 return rc;
746} 773}
747 774
748/* 775/**
749 * debug_set_level: 776 * debug_set_level() - Sets new actual debug level if new_level is valid.
750 * - set actual debug level 777 *
778 * @id: handle for debug log
779 * @new_level: new debug level
780 *
781 * Return:
782 * none
751 */ 783 */
752void debug_set_level(debug_info_t *id, int new_level) 784void debug_set_level(debug_info_t *id, int new_level)
753{ 785{
@@ -873,6 +905,14 @@ static struct ctl_table s390dbf_dir_table[] = {
873 905
874static struct ctl_table_header *s390dbf_sysctl_header; 906static struct ctl_table_header *s390dbf_sysctl_header;
875 907
908/**
909 * debug_stop_all() - stops the debug feature if stopping is allowed.
910 *
911 * Return:
912 * - none
913 *
914 * Currently used in case of a kernel oops.
915 */
876void debug_stop_all(void) 916void debug_stop_all(void)
877{ 917{
878 if (debug_stoppable) 918 if (debug_stoppable)
@@ -880,6 +920,17 @@ void debug_stop_all(void)
880} 920}
881EXPORT_SYMBOL(debug_stop_all); 921EXPORT_SYMBOL(debug_stop_all);
882 922
923/**
924 * debug_set_critical() - event/exception functions try lock instead of spin.
925 *
926 * Return:
927 * - none
928 *
929 * Currently used in case of stopping all CPUs but the current one.
930 * Once in this state, functions to write a debug entry for an
931 * event or exception no longer spin on the debug area lock,
932 * but only try to get it and fail if they do not get the lock.
933 */
883void debug_set_critical(void) 934void debug_set_critical(void)
884{ 935{
885 debug_critical = 1; 936 debug_critical = 1;
@@ -1036,8 +1087,16 @@ debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *stri
1036} 1087}
1037EXPORT_SYMBOL(__debug_sprintf_exception); 1088EXPORT_SYMBOL(__debug_sprintf_exception);
1038 1089
1039/* 1090/**
1040 * debug_register_view: 1091 * debug_register_view() - registers new debug view and creates debugfs
1092 * dir entry
1093 *
1094 * @id: handle for debug log
1095 * @view: pointer to debug view struct
1096 *
1097 * Return:
1098 * - 0 : ok
1099 * - < 0: Error
1041 */ 1100 */
1042int debug_register_view(debug_info_t *id, struct debug_view *view) 1101int debug_register_view(debug_info_t *id, struct debug_view *view)
1043{ 1102{
@@ -1077,8 +1136,16 @@ out:
1077} 1136}
1078EXPORT_SYMBOL(debug_register_view); 1137EXPORT_SYMBOL(debug_register_view);
1079 1138
1080/* 1139/**
1081 * debug_unregister_view: 1140 * debug_unregister_view() - unregisters debug view and removes debugfs
1141 * dir entry
1142 *
1143 * @id: handle for debug log
1144 * @view: pointer to debug view struct
1145 *
1146 * Return:
1147 * - 0 : ok
1148 * - < 0: Error
1082 */ 1149 */
1083int debug_unregister_view(debug_info_t *id, struct debug_view *view) 1150int debug_unregister_view(debug_info_t *id, struct debug_view *view)
1084{ 1151{
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index b2c68fbf2634..7abe6ae261b4 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -242,6 +242,7 @@ static const unsigned char formats[][6] = {
242 [INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 }, 242 [INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 },
243 [INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 }, 243 [INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 },
244 [INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 }, 244 [INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 },
245 [INSTR_RRF_URR] = { R_24, R_28, U8_16, 0, 0, 0 },
245 [INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 }, 246 [INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 },
246 [INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 }, 247 [INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 },
247 [INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 }, 248 [INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 },
@@ -306,7 +307,7 @@ static const unsigned char formats[][6] = {
306 [INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 }, 307 [INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
307 [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 }, 308 [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
308 [INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 }, 309 [INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
309 [INSTR_VRR_RV0U] = { R_8, V_12, U4_24, 0, 0, 0 }, 310 [INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
310 [INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 }, 311 [INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
311 [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 }, 312 [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
312 [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 }, 313 [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 },
@@ -326,10 +327,8 @@ static const unsigned char formats[][6] = {
326 [INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 }, 327 [INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 },
327 [INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 }, 328 [INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 },
328 [INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 }, 329 [INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 },
329 [INSTR_VRS_VVRD] = { V_8, V_12, D_20, B_16, 0, 0 },
330 [INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 }, 330 [INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 },
331 [INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 }, 331 [INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 },
332 [INSTR_VRX_VRRD] = { V_8, D_20, X_12, B_16, 0, 0 },
333 [INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 }, 332 [INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 },
334 [INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 }, 333 [INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 },
335 [INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 }, 334 [INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 },
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 9e87b68be21c..ac06c3949ab3 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -199,9 +199,7 @@ void die(struct pt_regs *regs, const char *str)
199#ifdef CONFIG_PREEMPT 199#ifdef CONFIG_PREEMPT
200 pr_cont("PREEMPT "); 200 pr_cont("PREEMPT ");
201#endif 201#endif
202#ifdef CONFIG_SMP
203 pr_cont("SMP "); 202 pr_cont("SMP ");
204#endif
205 if (debug_pagealloc_enabled()) 203 if (debug_pagealloc_enabled())
206 pr_cont("DEBUG_PAGEALLOC"); 204 pr_cont("DEBUG_PAGEALLOC");
207 pr_cont("\n"); 205 pr_cont("\n");
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3f4d272577d3..270d1d145761 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -986,14 +986,12 @@ ENTRY(psw_idle)
986 stg %r3,__SF_EMPTY(%r15) 986 stg %r3,__SF_EMPTY(%r15)
987 larl %r1,.Lpsw_idle_lpsw+4 987 larl %r1,.Lpsw_idle_lpsw+4
988 stg %r1,__SF_EMPTY+8(%r15) 988 stg %r1,__SF_EMPTY+8(%r15)
989#ifdef CONFIG_SMP
990 larl %r1,smp_cpu_mtid 989 larl %r1,smp_cpu_mtid
991 llgf %r1,0(%r1) 990 llgf %r1,0(%r1)
992 ltgr %r1,%r1 991 ltgr %r1,%r1
993 jz .Lpsw_idle_stcctm 992 jz .Lpsw_idle_stcctm
994 .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) 993 .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
995.Lpsw_idle_stcctm: 994.Lpsw_idle_stcctm:
996#endif
997 oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT 995 oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
998 BPON 996 BPON
999 STCK __CLOCK_IDLE_ENTER(%r2) 997 STCK __CLOCK_IDLE_ENTER(%r2)
@@ -1468,7 +1466,6 @@ ENDPROC(cleanup_critical)
1468 mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) 1466 mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
1469 mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) 1467 mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
14701: # calculate idle cycles 14681: # calculate idle cycles
1471#ifdef CONFIG_SMP
1472 clg %r9,BASED(.Lcleanup_idle_insn) 1469 clg %r9,BASED(.Lcleanup_idle_insn)
1473 jl 3f 1470 jl 3f
1474 larl %r1,smp_cpu_mtid 1471 larl %r1,smp_cpu_mtid
@@ -1486,7 +1483,6 @@ ENDPROC(cleanup_critical)
1486 la %r3,8(%r3) 1483 la %r3,8(%r3)
1487 la %r4,8(%r4) 1484 la %r4,8(%r4)
1488 brct %r1,2b 1485 brct %r1,2b
1489#endif
14903: # account system time going idle 14863: # account system time going idle
1491 lg %r9,__LC_STEAL_TIMER 1487 lg %r9,__LC_STEAL_TIMER
1492 alg %r9,__CLOCK_IDLE_ENTER(%r2) 1488 alg %r9,__CLOCK_IDLE_ENTER(%r2)
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index 3f10b56bd5a3..ab584e8e3527 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -15,16 +15,11 @@ struct insn {
15 s32 offset; 15 s32 offset;
16} __packed; 16} __packed;
17 17
18struct insn_args {
19 struct jump_entry *entry;
20 enum jump_label_type type;
21};
22
23static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn) 18static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
24{ 19{
25 /* brcl 0,0 */ 20 /* brcl 0,offset */
26 insn->opcode = 0xc004; 21 insn->opcode = 0xc004;
27 insn->offset = 0; 22 insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
28} 23}
29 24
30static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn) 25static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
@@ -77,23 +72,15 @@ static void __jump_label_transform(struct jump_entry *entry,
77 s390_kernel_write(code, &new, sizeof(new)); 72 s390_kernel_write(code, &new, sizeof(new));
78} 73}
79 74
80static int __sm_arch_jump_label_transform(void *data) 75static void __jump_label_sync(void *dummy)
81{ 76{
82 struct insn_args *args = data;
83
84 __jump_label_transform(args->entry, args->type, 0);
85 return 0;
86} 77}
87 78
88void arch_jump_label_transform(struct jump_entry *entry, 79void arch_jump_label_transform(struct jump_entry *entry,
89 enum jump_label_type type) 80 enum jump_label_type type)
90{ 81{
91 struct insn_args args; 82 __jump_label_transform(entry, type, 0);
92 83 smp_call_function(__jump_label_sync, NULL, 1);
93 args.entry = entry;
94 args.type = type;
95
96 stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL);
97} 84}
98 85
99void arch_jump_label_transform_static(struct jump_entry *entry, 86void arch_jump_label_transform_static(struct jump_entry *entry,
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 8a1ae140c5e2..444a19125a81 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -141,7 +141,6 @@ static noinline void __machine_kdump(void *image)
141 */ 141 */
142 store_status(__do_machine_kdump, image); 142 store_status(__do_machine_kdump, image);
143} 143}
144#endif
145 144
146static unsigned long do_start_kdump(unsigned long addr) 145static unsigned long do_start_kdump(unsigned long addr)
147{ 146{
@@ -155,6 +154,8 @@ static unsigned long do_start_kdump(unsigned long addr)
155 return rc; 154 return rc;
156} 155}
157 156
157#endif /* CONFIG_CRASH_DUMP */
158
158/* 159/*
159 * Check if kdump checksums are valid: We call purgatory with parameter "0" 160 * Check if kdump checksums are valid: We call purgatory with parameter "0"
160 */ 161 */
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 5de13307b703..6ebc2117c66c 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -7,6 +7,7 @@
7#define KMSG_COMPONENT "cpu" 7#define KMSG_COMPONENT "cpu"
8#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 8#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
9 9
10#include <linux/stop_machine.h>
10#include <linux/cpufeature.h> 11#include <linux/cpufeature.h>
11#include <linux/bitops.h> 12#include <linux/bitops.h>
12#include <linux/kernel.h> 13#include <linux/kernel.h>
@@ -31,6 +32,7 @@ struct cpu_info {
31}; 32};
32 33
33static DEFINE_PER_CPU(struct cpu_info, cpu_info); 34static DEFINE_PER_CPU(struct cpu_info, cpu_info);
35static DEFINE_PER_CPU(int, cpu_relax_retry);
34 36
35static bool machine_has_cpu_mhz; 37static bool machine_has_cpu_mhz;
36 38
@@ -58,15 +60,20 @@ void s390_update_cpu_mhz(void)
58 on_each_cpu(update_cpu_mhz, NULL, 0); 60 on_each_cpu(update_cpu_mhz, NULL, 0);
59} 61}
60 62
61void notrace cpu_relax_yield(void) 63void notrace stop_machine_yield(const struct cpumask *cpumask)
62{ 64{
63 if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { 65 int cpu, this_cpu;
64 diag_stat_inc(DIAG_STAT_X044); 66
65 asm volatile("diag 0,0,0x44"); 67 this_cpu = smp_processor_id();
68 if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
69 __this_cpu_write(cpu_relax_retry, 0);
70 cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
71 if (cpu >= nr_cpu_ids)
72 return;
73 if (arch_vcpu_is_preempted(cpu))
74 smp_yield_cpu(cpu);
66 } 75 }
67 barrier();
68} 76}
69EXPORT_SYMBOL(cpu_relax_yield);
70 77
71/* 78/*
72 * cpu_init - initializes state that is per-CPU. 79 * cpu_init - initializes state that is per-CPU.
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f8544d517430..2b94b0ad3588 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -461,11 +461,9 @@ static void __init setup_lowcore_dat_off(void)
461 mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); 461 mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
462 mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); 462 mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
463 463
464#ifdef CONFIG_SMP
465 lc->spinlock_lockval = arch_spin_lockval(0); 464 lc->spinlock_lockval = arch_spin_lockval(0);
466 lc->spinlock_index = 0; 465 lc->spinlock_index = 0;
467 arch_spin_lock_setup(0); 466 arch_spin_lock_setup(0);
468#endif
469 lc->br_r1_trampoline = 0x07f1; /* br %r1 */ 467 lc->br_r1_trampoline = 0x07f1; /* br %r1 */
470 468
471 set_prefix((u32)(unsigned long) lc); 469 set_prefix((u32)(unsigned long) lc);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 35fafa2b91a8..44974654cbd0 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -232,8 +232,6 @@ out:
232 return -ENOMEM; 232 return -ENOMEM;
233} 233}
234 234
235#ifdef CONFIG_HOTPLUG_CPU
236
237static void pcpu_free_lowcore(struct pcpu *pcpu) 235static void pcpu_free_lowcore(struct pcpu *pcpu)
238{ 236{
239 unsigned long async_stack, nodat_stack, lowcore; 237 unsigned long async_stack, nodat_stack, lowcore;
@@ -253,8 +251,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
253 free_pages(lowcore, LC_ORDER); 251 free_pages(lowcore, LC_ORDER);
254} 252}
255 253
256#endif /* CONFIG_HOTPLUG_CPU */
257
258static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) 254static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
259{ 255{
260 struct lowcore *lc = pcpu->lowcore; 256 struct lowcore *lc = pcpu->lowcore;
@@ -418,7 +414,7 @@ void smp_yield_cpu(int cpu)
418 diag_stat_inc_norecursion(DIAG_STAT_X09C); 414 diag_stat_inc_norecursion(DIAG_STAT_X09C);
419 asm volatile("diag %0,0,0x9c" 415 asm volatile("diag %0,0,0x9c"
420 : : "d" (pcpu_devices[cpu].address)); 416 : : "d" (pcpu_devices[cpu].address));
421 } else if (MACHINE_HAS_DIAG44) { 417 } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
422 diag_stat_inc_norecursion(DIAG_STAT_X044); 418 diag_stat_inc_norecursion(DIAG_STAT_X044);
423 asm volatile("diag 0,0,0x44"); 419 asm volatile("diag 0,0,0x44");
424 } 420 }
@@ -895,8 +891,6 @@ static int __init _setup_possible_cpus(char *s)
895} 891}
896early_param("possible_cpus", _setup_possible_cpus); 892early_param("possible_cpus", _setup_possible_cpus);
897 893
898#ifdef CONFIG_HOTPLUG_CPU
899
900int __cpu_disable(void) 894int __cpu_disable(void)
901{ 895{
902 unsigned long cregs[16]; 896 unsigned long cregs[16];
@@ -937,8 +931,6 @@ void __noreturn cpu_die(void)
937 for (;;) ; 931 for (;;) ;
938} 932}
939 933
940#endif /* CONFIG_HOTPLUG_CPU */
941
942void __init smp_fill_possible_mask(void) 934void __init smp_fill_possible_mask(void)
943{ 935{
944 unsigned int possible, sclp_max, cpu; 936 unsigned int possible, sclp_max, cpu;
@@ -996,7 +988,6 @@ int setup_profiling_timer(unsigned int multiplier)
996 return 0; 988 return 0;
997} 989}
998 990
999#ifdef CONFIG_HOTPLUG_CPU
1000static ssize_t cpu_configure_show(struct device *dev, 991static ssize_t cpu_configure_show(struct device *dev,
1001 struct device_attribute *attr, char *buf) 992 struct device_attribute *attr, char *buf)
1002{ 993{
@@ -1073,7 +1064,6 @@ out:
1073 return rc ? rc : count; 1064 return rc ? rc : count;
1074} 1065}
1075static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); 1066static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
1076#endif /* CONFIG_HOTPLUG_CPU */
1077 1067
1078static ssize_t show_cpu_address(struct device *dev, 1068static ssize_t show_cpu_address(struct device *dev,
1079 struct device_attribute *attr, char *buf) 1069 struct device_attribute *attr, char *buf)
@@ -1083,9 +1073,7 @@ static ssize_t show_cpu_address(struct device *dev,
1083static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); 1073static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
1084 1074
1085static struct attribute *cpu_common_attrs[] = { 1075static struct attribute *cpu_common_attrs[] = {
1086#ifdef CONFIG_HOTPLUG_CPU
1087 &dev_attr_configure.attr, 1076 &dev_attr_configure.attr,
1088#endif
1089 &dev_attr_address.attr, 1077 &dev_attr_address.attr,
1090 NULL, 1078 NULL,
1091}; 1079};
@@ -1144,15 +1132,11 @@ static int smp_add_present_cpu(int cpu)
1144out_topology: 1132out_topology:
1145 sysfs_remove_group(&s->kobj, &cpu_common_attr_group); 1133 sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
1146out_cpu: 1134out_cpu:
1147#ifdef CONFIG_HOTPLUG_CPU
1148 unregister_cpu(c); 1135 unregister_cpu(c);
1149#endif
1150out: 1136out:
1151 return rc; 1137 return rc;
1152} 1138}
1153 1139
1154#ifdef CONFIG_HOTPLUG_CPU
1155
1156int __ref smp_rescan_cpus(void) 1140int __ref smp_rescan_cpus(void)
1157{ 1141{
1158 struct sclp_core_info *info; 1142 struct sclp_core_info *info;
@@ -1188,17 +1172,14 @@ static ssize_t __ref rescan_store(struct device *dev,
1188 return rc ? rc : count; 1172 return rc ? rc : count;
1189} 1173}
1190static DEVICE_ATTR_WO(rescan); 1174static DEVICE_ATTR_WO(rescan);
1191#endif /* CONFIG_HOTPLUG_CPU */
1192 1175
1193static int __init s390_smp_init(void) 1176static int __init s390_smp_init(void)
1194{ 1177{
1195 int cpu, rc = 0; 1178 int cpu, rc = 0;
1196 1179
1197#ifdef CONFIG_HOTPLUG_CPU
1198 rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); 1180 rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
1199 if (rc) 1181 if (rc)
1200 return rc; 1182 return rc;
1201#endif
1202 for_each_present_cpu(cpu) { 1183 for_each_present_cpu(cpu) {
1203 rc = smp_add_present_cpu(cpu); 1184 rc = smp_add_present_cpu(cpu);
1204 if (rc) 1185 if (rc)
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 19a3c427801a..a7baf0b5f818 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -162,7 +162,6 @@ ENTRY(swsusp_arch_resume)
162 larl %r1,__swsusp_reset_dma 162 larl %r1,__swsusp_reset_dma
163 lg %r1,0(%r1) 163 lg %r1,0(%r1)
164 BASR_EX %r14,%r1 164 BASR_EX %r14,%r1
165#ifdef CONFIG_SMP
166 larl %r1,smp_cpu_mt_shift 165 larl %r1,smp_cpu_mt_shift
167 icm %r1,15,0(%r1) 166 icm %r1,15,0(%r1)
168 jz smt_done 167 jz smt_done
@@ -172,7 +171,6 @@ smt_loop:
172 brc 8,smt_done /* accepted */ 171 brc 8,smt_done /* accepted */
173 brc 2,smt_loop /* busy, try again */ 172 brc 2,smt_loop /* busy, try again */
174smt_done: 173smt_done:
175#endif
176 larl %r1,.Lnew_pgm_check_psw 174 larl %r1,.Lnew_pgm_check_psw
177 lpswe 0(%r1) 175 lpswe 0(%r1)
178pgm_check_entry: 176pgm_check_entry:
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 82e81a9f7112..4736b6ec0ad2 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -229,17 +229,11 @@ void vector_exception(struct pt_regs *regs)
229 229
230void data_exception(struct pt_regs *regs) 230void data_exception(struct pt_regs *regs)
231{ 231{
232 int signal = 0;
233
234 save_fpu_regs(); 232 save_fpu_regs();
235 if (current->thread.fpu.fpc & FPC_DXC_MASK) 233 if (current->thread.fpu.fpc & FPC_DXC_MASK)
236 signal = SIGFPE;
237 else
238 signal = SIGILL;
239 if (signal == SIGFPE)
240 do_fp_trap(regs, current->thread.fpu.fpc); 234 do_fp_trap(regs, current->thread.fpu.fpc);
241 else if (signal) 235 else
242 do_trap(regs, signal, ILL_ILLOPN, "data exception"); 236 do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
243} 237}
244 238
245void space_switch_exception(struct pt_regs *regs) 239void space_switch_exception(struct pt_regs *regs)
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 57fd4e902f1f..3ce8a0808059 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -46,18 +46,18 @@ bool unwind_next_frame(struct unwind_state *state)
46 46
47 regs = state->regs; 47 regs = state->regs;
48 if (unlikely(regs)) { 48 if (unlikely(regs)) {
49 sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]); 49 sp = READ_ONCE_NOCHECK(regs->gprs[15]);
50 if (unlikely(outside_of_stack(state, sp))) { 50 if (unlikely(outside_of_stack(state, sp))) {
51 if (!update_stack_info(state, sp)) 51 if (!update_stack_info(state, sp))
52 goto out_err; 52 goto out_err;
53 } 53 }
54 sf = (struct stack_frame *) sp; 54 sf = (struct stack_frame *) sp;
55 ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); 55 ip = READ_ONCE_NOCHECK(sf->gprs[8]);
56 reliable = false; 56 reliable = false;
57 regs = NULL; 57 regs = NULL;
58 } else { 58 } else {
59 sf = (struct stack_frame *) state->sp; 59 sf = (struct stack_frame *) state->sp;
60 sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain); 60 sp = READ_ONCE_NOCHECK(sf->back_chain);
61 if (likely(sp)) { 61 if (likely(sp)) {
62 /* Non-zero back-chain points to the previous frame */ 62 /* Non-zero back-chain points to the previous frame */
63 if (unlikely(outside_of_stack(state, sp))) { 63 if (unlikely(outside_of_stack(state, sp))) {
@@ -65,7 +65,7 @@ bool unwind_next_frame(struct unwind_state *state)
65 goto out_err; 65 goto out_err;
66 } 66 }
67 sf = (struct stack_frame *) sp; 67 sf = (struct stack_frame *) sp;
68 ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); 68 ip = READ_ONCE_NOCHECK(sf->gprs[8]);
69 reliable = true; 69 reliable = true;
70 } else { 70 } else {
71 /* No back-chain, look for a pt_regs structure */ 71 /* No back-chain, look for a pt_regs structure */
@@ -73,9 +73,9 @@ bool unwind_next_frame(struct unwind_state *state)
73 if (!on_stack(info, sp, sizeof(struct pt_regs))) 73 if (!on_stack(info, sp, sizeof(struct pt_regs)))
74 goto out_stop; 74 goto out_stop;
75 regs = (struct pt_regs *) sp; 75 regs = (struct pt_regs *) sp;
76 if (user_mode(regs)) 76 if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
77 goto out_stop; 77 goto out_stop;
78 ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr); 78 ip = READ_ONCE_NOCHECK(regs->psw.addr);
79 reliable = true; 79 reliable = true;
80 } 80 }
81 } 81 }
@@ -132,11 +132,11 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
132 132
133 /* Get the instruction pointer from pt_regs or the stack frame */ 133 /* Get the instruction pointer from pt_regs or the stack frame */
134 if (regs) { 134 if (regs) {
135 ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr); 135 ip = READ_ONCE_NOCHECK(regs->psw.addr);
136 reliable = true; 136 reliable = true;
137 } else { 137 } else {
138 sf = (struct stack_frame *) sp; 138 sf = (struct stack_frame *) sp;
139 ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); 139 ip = READ_ONCE_NOCHECK(sf->gprs[8]);
140 reliable = false; 140 reliable = false;
141 } 141 }
142 142
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 28ebd647784c..1c4113f0f2a8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2461,6 +2461,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2461 set_kvm_facility(kvm->arch.model.fac_list, 147); 2461 set_kvm_facility(kvm->arch.model.fac_list, 147);
2462 } 2462 }
2463 2463
2464 if (css_general_characteristics.aiv && test_facility(65))
2465 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2466
2464 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2467 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2465 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2468 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2466 2469
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8679bd74d337..ed52ffa8d5d4 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -27,6 +27,7 @@
27#include <asm/io.h> 27#include <asm/io.h>
28#include <asm/ptrace.h> 28#include <asm/ptrace.h>
29#include <asm/sclp.h> 29#include <asm/sclp.h>
30#include <asm/ap.h>
30#include "gaccess.h" 31#include "gaccess.h"
31#include "kvm-s390.h" 32#include "kvm-s390.h"
32#include "trace.h" 33#include "trace.h"
@@ -592,6 +593,89 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
592 } 593 }
593} 594}
594 595
596/*
597 * handle_pqap: Handling pqap interception
598 * @vcpu: the vcpu having issue the pqap instruction
599 *
600 * We now support PQAP/AQIC instructions and we need to correctly
601 * answer the guest even if no dedicated driver's hook is available.
602 *
603 * The intercepting code calls a dedicated callback for this instruction
604 * if a driver did register one in the CRYPTO satellite of the
605 * SIE block.
606 *
607 * If no callback is available, the queues are not available, return this
608 * response code to the caller and set CC to 3.
609 * Else return the response code returned by the callback.
610 */
611static int handle_pqap(struct kvm_vcpu *vcpu)
612{
613 struct ap_queue_status status = {};
614 unsigned long reg0;
615 int ret;
616 uint8_t fc;
617
618 /* Verify that the AP instruction are available */
619 if (!ap_instructions_available())
620 return -EOPNOTSUPP;
621 /* Verify that the guest is allowed to use AP instructions */
622 if (!(vcpu->arch.sie_block->eca & ECA_APIE))
623 return -EOPNOTSUPP;
624 /*
625 * The only possibly intercepted functions when AP instructions are
626 * available for the guest are AQIC and TAPQ with the t bit set
627 * since we do not set IC.3 (FIII) we currently will only intercept
628 * the AQIC function code.
629 */
630 reg0 = vcpu->run->s.regs.gprs[0];
631 fc = (reg0 >> 24) & 0xff;
632 if (WARN_ON_ONCE(fc != 0x03))
633 return -EOPNOTSUPP;
634
635 /* PQAP instruction is allowed for guest kernel only */
636 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
637 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
638
639 /* Common PQAP instruction specification exceptions */
640 /* bits 41-47 must all be zeros */
641 if (reg0 & 0x007f0000UL)
642 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
643 /* APFT not install and T bit set */
644 if (!test_kvm_facility(vcpu->kvm, 15) && (reg0 & 0x00800000UL))
645 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
646 /* APXA not installed and APID greater 64 or APQI greater 16 */
647 if (!(vcpu->kvm->arch.crypto.crycbd & 0x02) && (reg0 & 0x0000c0f0UL))
648 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
649
650 /* AQIC function code specific exception */
651 /* facility 65 not present for AQIC function code */
652 if (!test_kvm_facility(vcpu->kvm, 65))
653 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
654
655 /*
656 * Verify that the hook callback is registered, lock the owner
657 * and call the hook.
658 */
659 if (vcpu->kvm->arch.crypto.pqap_hook) {
660 if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
661 return -EOPNOTSUPP;
662 ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
663 module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
664 if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
665 kvm_s390_set_psw_cc(vcpu, 3);
666 return ret;
667 }
668 /*
669 * A vfio_driver must register a hook.
670 * No hook means no driver to enable the SIE CRYCB and no queues.
671 * We send this response to the guest.
672 */
673 status.response_code = 0x01;
674 memcpy(&vcpu->run->s.regs.gprs[1], &status, sizeof(status));
675 kvm_s390_set_psw_cc(vcpu, 3);
676 return 0;
677}
678
595static int handle_stfl(struct kvm_vcpu *vcpu) 679static int handle_stfl(struct kvm_vcpu *vcpu)
596{ 680{
597 int rc; 681 int rc;
@@ -878,6 +962,8 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
878 return handle_sthyi(vcpu); 962 return handle_sthyi(vcpu);
879 case 0x7d: 963 case 0x7d:
880 return handle_stsi(vcpu); 964 return handle_stsi(vcpu);
965 case 0xaf:
966 return handle_pqap(vcpu);
881 case 0xb1: 967 case 0xb1:
882 return handle_stfl(vcpu); 968 return handle_stfl(vcpu);
883 case 0xb2: 969 case 0xb2:
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 5418d10dc2a8..a1ec63abfb95 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,9 +3,8 @@
3# Makefile for s390-specific library files.. 3# Makefile for s390-specific library files..
4# 4#
5 5
6lib-y += delay.o string.o uaccess.o find.o 6lib-y += delay.o string.o uaccess.o find.o spinlock.o
7obj-y += mem.o xor.o 7obj-y += mem.o xor.o
8lib-$(CONFIG_SMP) += spinlock.o
9lib-$(CONFIG_KPROBES) += probes.o 8lib-$(CONFIG_KPROBES) += probes.o
10lib-$(CONFIG_UPROBES) += probes.o 9lib-$(CONFIG_UPROBES) += probes.o
11 10
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 14d1eae9fe43..f0bee6af3960 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -18,6 +18,7 @@
18#include <linux/mman.h> 18#include <linux/mman.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/swiotlb.h>
21#include <linux/smp.h> 22#include <linux/smp.h>
22#include <linux/init.h> 23#include <linux/init.h>
23#include <linux/pagemap.h> 24#include <linux/pagemap.h>
@@ -29,6 +30,7 @@
29#include <linux/export.h> 30#include <linux/export.h>
30#include <linux/cma.h> 31#include <linux/cma.h>
31#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/dma-mapping.h>
32#include <asm/processor.h> 34#include <asm/processor.h>
33#include <linux/uaccess.h> 35#include <linux/uaccess.h>
34#include <asm/pgtable.h> 36#include <asm/pgtable.h>
@@ -42,6 +44,8 @@
42#include <asm/sclp.h> 44#include <asm/sclp.h>
43#include <asm/set_memory.h> 45#include <asm/set_memory.h>
44#include <asm/kasan.h> 46#include <asm/kasan.h>
47#include <asm/dma-mapping.h>
48#include <asm/uv.h>
45 49
46pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); 50pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
47 51
@@ -128,6 +132,47 @@ void mark_rodata_ro(void)
128 pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); 132 pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
129} 133}
130 134
135int set_memory_encrypted(unsigned long addr, int numpages)
136{
137 int i;
138
139 /* make specified pages unshared, (swiotlb, dma_free) */
140 for (i = 0; i < numpages; ++i) {
141 uv_remove_shared(addr);
142 addr += PAGE_SIZE;
143 }
144 return 0;
145}
146
147int set_memory_decrypted(unsigned long addr, int numpages)
148{
149 int i;
150 /* make specified pages shared (swiotlb, dma_alloca) */
151 for (i = 0; i < numpages; ++i) {
152 uv_set_shared(addr);
153 addr += PAGE_SIZE;
154 }
155 return 0;
156}
157
158/* are we a protected virtualization guest? */
159bool sev_active(void)
160{
161 return is_prot_virt_guest();
162}
163
164/* protected virtualization */
165static void pv_init(void)
166{
167 if (!is_prot_virt_guest())
168 return;
169
170 /* make sure bounce buffers are shared */
171 swiotlb_init(1);
172 swiotlb_update_mem_attributes();
173 swiotlb_force = SWIOTLB_FORCE;
174}
175
131void __init mem_init(void) 176void __init mem_init(void)
132{ 177{
133 cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); 178 cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
@@ -136,6 +181,8 @@ void __init mem_init(void)
136 set_max_mapnr(max_low_pfn); 181 set_max_mapnr(max_low_pfn);
137 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); 182 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
138 183
184 pv_init();
185
139 /* Setup guest page hinting */ 186 /* Setup guest page hinting */
140 cmma_init(); 187 cmma_init();
141 188
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 818deeb1ebc3..1864a8bb9622 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -52,21 +52,22 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
52 * Therefore we have a read-modify-write sequence: the function reads eight 52 * Therefore we have a read-modify-write sequence: the function reads eight
53 * bytes from destination at an eight byte boundary, modifies the bytes 53 * bytes from destination at an eight byte boundary, modifies the bytes
54 * requested and writes the result back in a loop. 54 * requested and writes the result back in a loop.
55 *
56 * Note: this means that this function may not be called concurrently on
57 * several cpus with overlapping words, since this may potentially
58 * cause data corruption.
59 */ 55 */
56static DEFINE_SPINLOCK(s390_kernel_write_lock);
57
60void notrace s390_kernel_write(void *dst, const void *src, size_t size) 58void notrace s390_kernel_write(void *dst, const void *src, size_t size)
61{ 59{
60 unsigned long flags;
62 long copied; 61 long copied;
63 62
63 spin_lock_irqsave(&s390_kernel_write_lock, flags);
64 while (size) { 64 while (size) {
65 copied = s390_kernel_write_odd(dst, src, size); 65 copied = s390_kernel_write_odd(dst, src, size);
66 dst += copied; 66 dst += copied;
67 src += copied; 67 src += copied;
68 size -= copied; 68 size -= copied;
69 } 69 }
70 spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
70} 71}
71 72
72static int __memcpy_real(void *dest, void *src, size_t count) 73static int __memcpy_real(void *dest, void *src, size_t count)
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 687f2a4d3459..cbc718ba6d78 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -24,8 +24,6 @@ static unsigned long stack_maxrandom_size(void)
24{ 24{
25 if (!(current->flags & PF_RANDOMIZE)) 25 if (!(current->flags & PF_RANDOMIZE))
26 return 0; 26 return 0;
27 if (current->personality & ADDR_NO_RANDOMIZE)
28 return 0;
29 return STACK_RND_MASK << PAGE_SHIFT; 27 return STACK_RND_MASK << PAGE_SHIFT;
30} 28}
31 29
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 86ca7f88fb22..b8a64cbb5dea 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -421,12 +421,12 @@ static void zpci_map_resources(struct pci_dev *pdev)
421 if (!len) 421 if (!len)
422 continue; 422 continue;
423 423
424 if (static_branch_likely(&have_mio)) 424 if (zpci_use_mio(zdev))
425 pdev->resource[i].start = 425 pdev->resource[i].start =
426 (resource_size_t __force) zdev->bars[i].mio_wb; 426 (resource_size_t __force) zdev->bars[i].mio_wb;
427 else 427 else
428 pdev->resource[i].start = 428 pdev->resource[i].start = (resource_size_t __force)
429 (resource_size_t __force) pci_iomap(pdev, i, 0); 429 pci_iomap_range_fh(pdev, i, 0, 0);
430 pdev->resource[i].end = pdev->resource[i].start + len - 1; 430 pdev->resource[i].end = pdev->resource[i].start + len - 1;
431 } 431 }
432 432
@@ -444,18 +444,19 @@ static void zpci_map_resources(struct pci_dev *pdev)
444 444
445static void zpci_unmap_resources(struct pci_dev *pdev) 445static void zpci_unmap_resources(struct pci_dev *pdev)
446{ 446{
447 struct zpci_dev *zdev = to_zpci(pdev);
447 resource_size_t len; 448 resource_size_t len;
448 int i; 449 int i;
449 450
450 if (static_branch_likely(&have_mio)) 451 if (zpci_use_mio(zdev))
451 return; 452 return;
452 453
453 for (i = 0; i < PCI_BAR_COUNT; i++) { 454 for (i = 0; i < PCI_BAR_COUNT; i++) {
454 len = pci_resource_len(pdev, i); 455 len = pci_resource_len(pdev, i);
455 if (!len) 456 if (!len)
456 continue; 457 continue;
457 pci_iounmap(pdev, (void __iomem __force *) 458 pci_iounmap_fh(pdev, (void __iomem __force *)
458 pdev->resource[i].start); 459 pdev->resource[i].start);
459 } 460 }
460} 461}
461 462
@@ -528,7 +529,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
528 if (zdev->bars[i].val & 4) 529 if (zdev->bars[i].val & 4)
529 flags |= IORESOURCE_MEM_64; 530 flags |= IORESOURCE_MEM_64;
530 531
531 if (static_branch_likely(&have_mio)) 532 if (zpci_use_mio(zdev))
532 addr = (unsigned long) zdev->bars[i].mio_wb; 533 addr = (unsigned long) zdev->bars[i].mio_wb;
533 else 534 else
534 addr = ZPCI_ADDR(entry); 535 addr = ZPCI_ADDR(entry);
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index d03631dba7c2..9bdff4defef1 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -291,7 +291,7 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
291 goto out; 291 goto out;
292 292
293 zdev->fh = fh; 293 zdev->fh = fh;
294 if (zdev->mio_capable) { 294 if (zpci_use_mio(zdev)) {
295 rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO); 295 rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
296 zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); 296 zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
297 if (rc) 297 if (rc)
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 6b48ca7760a7..3408c0df3ebf 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -74,7 +74,7 @@ static void pci_sw_counter_show(struct seq_file *m)
74 int i; 74 int i;
75 75
76 for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) 76 for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
77 seq_printf(m, "%26s:\t%lu\n", pci_sw_names[i], 77 seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
78 atomic64_read(counter)); 78 atomic64_read(counter));
79} 79}
80 80
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
index e9e66f178a6d..04a03433c720 100644
--- a/arch/s390/purgatory/.gitignore
+++ b/arch/s390/purgatory/.gitignore
@@ -1,2 +1,3 @@
1kexec-purgatory.c 1purgatory
2purgatory.lds
2purgatory.ro 3purgatory.ro
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
index 2342b84b3386..b5e35e8f999a 100644
--- a/arch/s390/tools/Makefile
+++ b/arch/s390/tools/Makefile
@@ -6,7 +6,6 @@
6kapi := arch/$(ARCH)/include/generated/asm 6kapi := arch/$(ARCH)/include/generated/asm
7kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h 7kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h
8 8
9targets += $(addprefix ../../../,$(kapi-hdrs-y))
10PHONY += kapi 9PHONY += kapi
11 10
12kapi: $(kapi-hdrs-y) 11kapi: $(kapi-hdrs-y)
@@ -14,11 +13,7 @@ kapi: $(kapi-hdrs-y)
14hostprogs-y += gen_facilities 13hostprogs-y += gen_facilities
15hostprogs-y += gen_opcode_table 14hostprogs-y += gen_opcode_table
16 15
17HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE) 16HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE)
18HOSTCFLAGS_gen_opcode_table.o += -Wall $(LINUXINCLUDE)
19
20# Ensure output directory exists
21_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
22 17
23filechk_facility-defs.h = $(obj)/gen_facilities 18filechk_facility-defs.h = $(obj)/gen_facilities
24 19
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 64638b764d1c..46d8ed96cf06 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -520,6 +520,9 @@ b92e km RRE_RR
520b92f kmc RRE_RR 520b92f kmc RRE_RR
521b930 cgfr RRE_RR 521b930 cgfr RRE_RR
522b931 clgfr RRE_RR 522b931 clgfr RRE_RR
523b938 sortl RRE_RR
524b939 dfltcc RRF_R0RR2
525b93a kdsa RRE_RR
523b93c ppno RRE_RR 526b93c ppno RRE_RR
524b93e kimd RRE_RR 527b93e kimd RRE_RR
525b93f klmd RRE_RR 528b93f klmd RRE_RR
@@ -538,8 +541,16 @@ b95a cxlgtr RRF_UUFR
538b95b cxlftr RRF_UUFR 541b95b cxlftr RRF_UUFR
539b960 cgrt RRF_U0RR 542b960 cgrt RRF_U0RR
540b961 clgrt RRF_U0RR 543b961 clgrt RRF_U0RR
544b964 nngrk RRF_R0RR2
545b965 ocgrk RRF_R0RR2
546b966 nogrk RRF_R0RR2
547b967 nxgrk RRF_R0RR2
541b972 crt RRF_U0RR 548b972 crt RRF_U0RR
542b973 clrt RRF_U0RR 549b973 clrt RRF_U0RR
550b974 nnrk RRF_R0RR2
551b975 ocrk RRF_R0RR2
552b976 nork RRF_R0RR2
553b977 nxrk RRF_R0RR2
543b980 ngr RRE_RR 554b980 ngr RRE_RR
544b981 ogr RRE_RR 555b981 ogr RRE_RR
545b982 xgr RRE_RR 556b982 xgr RRE_RR
@@ -573,6 +584,7 @@ b99f ssair RRE_R0
573b9a0 clp RRF_U0RR 584b9a0 clp RRF_U0RR
574b9a1 tpei RRE_RR 585b9a1 tpei RRE_RR
575b9a2 ptf RRE_R0 586b9a2 ptf RRE_R0
587b9a4 uvc RRF_URR
576b9aa lptea RRF_RURR2 588b9aa lptea RRF_RURR2
577b9ab essa RRF_U0RR 589b9ab essa RRF_U0RR
578b9ac irbm RRE_RR 590b9ac irbm RRE_RR
@@ -585,6 +597,7 @@ b9b3 cu42 RRE_RR
585b9bd trtre RRF_U0RR 597b9bd trtre RRF_U0RR
586b9be srstu RRE_RR 598b9be srstu RRE_RR
587b9bf trte RRF_U0RR 599b9bf trte RRF_U0RR
600b9c0 selhhhr RRF_RURR
588b9c8 ahhhr RRF_R0RR2 601b9c8 ahhhr RRF_R0RR2
589b9c9 shhhr RRF_R0RR2 602b9c9 shhhr RRF_R0RR2
590b9ca alhhhr RRF_R0RR2 603b9ca alhhhr RRF_R0RR2
@@ -594,6 +607,9 @@ b9cf clhhr RRE_RR
594b9d0 pcistg RRE_RR 607b9d0 pcistg RRE_RR
595b9d2 pcilg RRE_RR 608b9d2 pcilg RRE_RR
596b9d3 rpcit RRE_RR 609b9d3 rpcit RRE_RR
610b9d4 pcistgi RRE_RR
611b9d5 pciwb RRE_00
612b9d6 pcilgi RRE_RR
597b9d8 ahhlr RRF_R0RR2 613b9d8 ahhlr RRF_R0RR2
598b9d9 shhlr RRF_R0RR2 614b9d9 shhlr RRF_R0RR2
599b9da alhhlr RRF_R0RR2 615b9da alhhlr RRF_R0RR2
@@ -601,9 +617,11 @@ b9db slhhlr RRF_R0RR2
601b9dd chlr RRE_RR 617b9dd chlr RRE_RR
602b9df clhlr RRE_RR 618b9df clhlr RRE_RR
603b9e0 locfhr RRF_U0RR 619b9e0 locfhr RRF_U0RR
604b9e1 popcnt RRE_RR 620b9e1 popcnt RRF_U0RR
605b9e2 locgr RRF_U0RR 621b9e2 locgr RRF_U0RR
622b9e3 selgr RRF_RURR
606b9e4 ngrk RRF_R0RR2 623b9e4 ngrk RRF_R0RR2
624b9e5 ncgrk RRF_R0RR2
607b9e6 ogrk RRF_R0RR2 625b9e6 ogrk RRF_R0RR2
608b9e7 xgrk RRF_R0RR2 626b9e7 xgrk RRF_R0RR2
609b9e8 agrk RRF_R0RR2 627b9e8 agrk RRF_R0RR2
@@ -612,8 +630,10 @@ b9ea algrk RRF_R0RR2
612b9eb slgrk RRF_R0RR2 630b9eb slgrk RRF_R0RR2
613b9ec mgrk RRF_R0RR2 631b9ec mgrk RRF_R0RR2
614b9ed msgrkc RRF_R0RR2 632b9ed msgrkc RRF_R0RR2
633b9f0 selr RRF_RURR
615b9f2 locr RRF_U0RR 634b9f2 locr RRF_U0RR
616b9f4 nrk RRF_R0RR2 635b9f4 nrk RRF_R0RR2
636b9f5 ncrk RRF_R0RR2
617b9f6 ork RRF_R0RR2 637b9f6 ork RRF_R0RR2
618b9f7 xrk RRF_R0RR2 638b9f7 xrk RRF_R0RR2
619b9f8 ark RRF_R0RR2 639b9f8 ark RRF_R0RR2
@@ -822,6 +842,7 @@ e3d4 stpcifc RXY_RRRD
822e500 lasp SSE_RDRD 842e500 lasp SSE_RDRD
823e501 tprot SSE_RDRD 843e501 tprot SSE_RDRD
824e502 strag SSE_RDRD 844e502 strag SSE_RDRD
845e50a mvcrl SSE_RDRD
825e50e mvcsk SSE_RDRD 846e50e mvcsk SSE_RDRD
826e50f mvcdk SSE_RDRD 847e50f mvcdk SSE_RDRD
827e544 mvhhi SIL_RDI 848e544 mvhhi SIL_RDI
@@ -835,6 +856,18 @@ e55c chsi SIL_RDI
835e55d clfhsi SIL_RDU 856e55d clfhsi SIL_RDU
836e560 tbegin SIL_RDU 857e560 tbegin SIL_RDU
837e561 tbeginc SIL_RDU 858e561 tbeginc SIL_RDU
859e601 vlebrh VRX_VRRDU
860e602 vlebrg VRX_VRRDU
861e603 vlebrf VRX_VRRDU
862e604 vllebrz VRX_VRRDU
863e605 vlbrrep VRX_VRRDU
864e606 vlbr VRX_VRRDU
865e607 vler VRX_VRRDU
866e609 vstebrh VRX_VRRDU
867e60a vstebrg VRX_VRRDU
868e60b vstebrf VRX_VRRDU
869e60e vstbr VRX_VRRDU
870e60f vster VRX_VRRDU
838e634 vpkz VSI_URDV 871e634 vpkz VSI_URDV
839e635 vlrl VSI_URDV 872e635 vlrl VSI_URDV
840e637 vlrlr VRS_RRDV 873e637 vlrlr VRS_RRDV
@@ -842,8 +875,8 @@ e63c vupkz VSI_URDV
842e63d vstrl VSI_URDV 875e63d vstrl VSI_URDV
843e63f vstrlr VRS_RRDV 876e63f vstrlr VRS_RRDV
844e649 vlip VRI_V0UU2 877e649 vlip VRI_V0UU2
845e650 vcvb VRR_RV0U 878e650 vcvb VRR_RV0UU
846e652 vcvbg VRR_RV0U 879e652 vcvbg VRR_RV0UU
847e658 vcvd VRI_VR0UU 880e658 vcvd VRI_VR0UU
848e659 vsrp VRI_VVUUU2 881e659 vsrp VRI_VVUUU2
849e65a vcvdg VRI_VR0UU 882e65a vcvdg VRI_VR0UU
@@ -863,13 +896,13 @@ e702 vleg VRX_VRRDU
863e703 vlef VRX_VRRDU 896e703 vlef VRX_VRRDU
864e704 vllez VRX_VRRDU 897e704 vllez VRX_VRRDU
865e705 vlrep VRX_VRRDU 898e705 vlrep VRX_VRRDU
866e706 vl VRX_VRRD 899e706 vl VRX_VRRDU
867e707 vlbb VRX_VRRDU 900e707 vlbb VRX_VRRDU
868e708 vsteb VRX_VRRDU 901e708 vsteb VRX_VRRDU
869e709 vsteh VRX_VRRDU 902e709 vsteh VRX_VRRDU
870e70a vsteg VRX_VRRDU 903e70a vsteg VRX_VRRDU
871e70b vstef VRX_VRRDU 904e70b vstef VRX_VRRDU
872e70e vst VRX_VRRD 905e70e vst VRX_VRRDU
873e712 vgeg VRV_VVXRDU 906e712 vgeg VRV_VVXRDU
874e713 vgef VRV_VVXRDU 907e713 vgef VRV_VVXRDU
875e71a vsceg VRV_VVXRDU 908e71a vsceg VRV_VVXRDU
@@ -879,11 +912,11 @@ e722 vlvg VRS_VRRDU
879e727 lcbb RXE_RRRDU 912e727 lcbb RXE_RRRDU
880e730 vesl VRS_VVRDU 913e730 vesl VRS_VVRDU
881e733 verll VRS_VVRDU 914e733 verll VRS_VVRDU
882e736 vlm VRS_VVRD 915e736 vlm VRS_VVRDU
883e737 vll VRS_VRRD 916e737 vll VRS_VRRD
884e738 vesrl VRS_VVRDU 917e738 vesrl VRS_VVRDU
885e73a vesra VRS_VVRDU 918e73a vesra VRS_VVRDU
886e73e vstm VRS_VVRD 919e73e vstm VRS_VVRDU
887e73f vstl VRS_VRRD 920e73f vstl VRS_VRRD
888e740 vleib VRI_V0IU 921e740 vleib VRI_V0IU
889e741 vleih VRI_V0IU 922e741 vleih VRI_V0IU
@@ -932,7 +965,10 @@ e781 vfene VRR_VVV0U0U
932e782 vfae VRR_VVV0U0U 965e782 vfae VRR_VVV0U0U
933e784 vpdi VRR_VVV0U 966e784 vpdi VRR_VVV0U
934e785 vbperm VRR_VVV 967e785 vbperm VRR_VVV
968e786 vsld VRI_VVV0U
969e787 vsrd VRI_VVV0U
935e78a vstrc VRR_VVVUU0V 970e78a vstrc VRR_VVVUU0V
971e78b vstrs VRR_VVVUU0V
936e78c vperm VRR_VVV0V 972e78c vperm VRR_VVV0V
937e78d vsel VRR_VVV0V 973e78d vsel VRR_VVV0V
938e78e vfms VRR_VVVU0UV 974e78e vfms VRR_VVVU0UV
@@ -1060,6 +1096,7 @@ eb9b stamy RSY_AARD
1060ebc0 tp RSL_R0RD 1096ebc0 tp RSL_R0RD
1061ebd0 pcistb RSY_RRRD 1097ebd0 pcistb RSY_RRRD
1062ebd1 sic RSY_RRRD 1098ebd1 sic RSY_RRRD
1099ebd4 pcistbi RSY_RRRD
1063ebdc srak RSY_RRRD 1100ebdc srak RSY_RRRD
1064ebdd slak RSY_RRRD 1101ebdd slak RSY_RRRD
1065ebde srlk RSY_RRRD 1102ebde srlk RSY_RRRD
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 6963482c81d8..b60448397d4f 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -23,15 +23,15 @@
23 23
24#define ATOMIC_OP(op) \ 24#define ATOMIC_OP(op) \
25void atomic_##op(int, atomic_t *); \ 25void atomic_##op(int, atomic_t *); \
26void atomic64_##op(long, atomic64_t *); 26void atomic64_##op(s64, atomic64_t *);
27 27
28#define ATOMIC_OP_RETURN(op) \ 28#define ATOMIC_OP_RETURN(op) \
29int atomic_##op##_return(int, atomic_t *); \ 29int atomic_##op##_return(int, atomic_t *); \
30long atomic64_##op##_return(long, atomic64_t *); 30s64 atomic64_##op##_return(s64, atomic64_t *);
31 31
32#define ATOMIC_FETCH_OP(op) \ 32#define ATOMIC_FETCH_OP(op) \
33int atomic_fetch_##op(int, atomic_t *); \ 33int atomic_fetch_##op(int, atomic_t *); \
34long atomic64_fetch_##op(long, atomic64_t *); 34s64 atomic64_fetch_##op(s64, atomic64_t *);
35 35
36#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) 36#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
37 37
@@ -61,7 +61,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
61 ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) 61 ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
62#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) 62#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
63 63
64long atomic64_dec_if_positive(atomic64_t *v); 64s64 atomic64_dec_if_positive(atomic64_t *v);
65#define atomic64_dec_if_positive atomic64_dec_if_positive 65#define atomic64_dec_if_positive atomic64_dec_if_positive
66 66
67#endif /* !(__ARCH_SPARC64_ATOMIC__) */ 67#endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2bbbd4d1ba31..fbabf59692ff 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,6 +17,7 @@ config X86_32
17 select HAVE_DEBUG_STACKOVERFLOW 17 select HAVE_DEBUG_STACKOVERFLOW
18 select MODULES_USE_ELF_REL 18 select MODULES_USE_ELF_REL
19 select OLD_SIGACTION 19 select OLD_SIGACTION
20 select GENERIC_VDSO_32
20 21
21config X86_64 22config X86_64
22 def_bool y 23 def_bool y
@@ -121,6 +122,7 @@ config X86
121 select GENERIC_STRNCPY_FROM_USER 122 select GENERIC_STRNCPY_FROM_USER
122 select GENERIC_STRNLEN_USER 123 select GENERIC_STRNLEN_USER
123 select GENERIC_TIME_VSYSCALL 124 select GENERIC_TIME_VSYSCALL
125 select GENERIC_GETTIMEOFDAY
124 select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 126 select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
125 select HAVE_ACPI_APEI if ACPI 127 select HAVE_ACPI_APEI if ACPI
126 select HAVE_ACPI_APEI_NMI if ACPI 128 select HAVE_ACPI_APEI_NMI if ACPI
@@ -202,6 +204,7 @@ config X86
202 select HAVE_SYSCALL_TRACEPOINTS 204 select HAVE_SYSCALL_TRACEPOINTS
203 select HAVE_UNSTABLE_SCHED_CLOCK 205 select HAVE_UNSTABLE_SCHED_CLOCK
204 select HAVE_USER_RETURN_NOTIFIER 206 select HAVE_USER_RETURN_NOTIFIER
207 select HAVE_GENERIC_VDSO
205 select HOTPLUG_SMT if SMP 208 select HOTPLUG_SMT if SMP
206 select IRQ_FORCED_THREADING 209 select IRQ_FORCED_THREADING
207 select NEED_SG_DMA_LENGTH 210 select NEED_SG_DMA_LENGTH
@@ -2285,7 +2288,7 @@ config COMPAT_VDSO
2285choice 2288choice
2286 prompt "vsyscall table for legacy applications" 2289 prompt "vsyscall table for legacy applications"
2287 depends on X86_64 2290 depends on X86_64
2288 default LEGACY_VSYSCALL_EMULATE 2291 default LEGACY_VSYSCALL_XONLY
2289 help 2292 help
2290 Legacy user code that does not know how to find the vDSO expects 2293 Legacy user code that does not know how to find the vDSO expects
2291 to be able to issue three syscalls by calling fixed addresses in 2294 to be able to issue three syscalls by calling fixed addresses in
@@ -2293,23 +2296,38 @@ choice
2293 it can be used to assist security vulnerability exploitation. 2296 it can be used to assist security vulnerability exploitation.
2294 2297
2295 This setting can be changed at boot time via the kernel command 2298 This setting can be changed at boot time via the kernel command
2296 line parameter vsyscall=[emulate|none]. 2299 line parameter vsyscall=[emulate|xonly|none].
2297 2300
2298 On a system with recent enough glibc (2.14 or newer) and no 2301 On a system with recent enough glibc (2.14 or newer) and no
2299 static binaries, you can say None without a performance penalty 2302 static binaries, you can say None without a performance penalty
2300 to improve security. 2303 to improve security.
2301 2304
2302 If unsure, select "Emulate". 2305 If unsure, select "Emulate execution only".
2303 2306
2304 config LEGACY_VSYSCALL_EMULATE 2307 config LEGACY_VSYSCALL_EMULATE
2305 bool "Emulate" 2308 bool "Full emulation"
2306 help 2309 help
2307 The kernel traps and emulates calls into the fixed 2310 The kernel traps and emulates calls into the fixed vsyscall
2308 vsyscall address mapping. This makes the mapping 2311 address mapping. This makes the mapping non-executable, but
2309 non-executable, but it still contains known contents, 2312 it still contains readable known contents, which could be
2310 which could be used in certain rare security vulnerability 2313 used in certain rare security vulnerability exploits. This
2311 exploits. This configuration is recommended when userspace 2314 configuration is recommended when using legacy userspace
2312 still uses the vsyscall area. 2315 that still uses vsyscalls along with legacy binary
2316 instrumentation tools that require code to be readable.
2317
2318 An example of this type of legacy userspace is running
2319 Pin on an old binary that still uses vsyscalls.
2320
2321 config LEGACY_VSYSCALL_XONLY
2322 bool "Emulate execution only"
2323 help
2324 The kernel traps and emulates calls into the fixed vsyscall
2325 address mapping and does not allow reads. This
2326 configuration is recommended when userspace might use the
2327 legacy vsyscall area but support for legacy binary
2328 instrumentation of legacy code is not needed. It mitigates
2329 certain uses of the vsyscall area as an ASLR-bypassing
2330 buffer.
2313 2331
2314 config LEGACY_VSYSCALL_NONE 2332 config LEGACY_VSYSCALL_NONE
2315 bool "None" 2333 bool "None"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 6adce15268bd..8e29c991ba3e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -480,3 +480,16 @@ config CPU_SUP_UMC_32
480 CPU might render the kernel unbootable. 480 CPU might render the kernel unbootable.
481 481
482 If unsure, say N. 482 If unsure, say N.
483
484config CPU_SUP_ZHAOXIN
485 default y
486 bool "Support Zhaoxin processors" if PROCESSOR_SELECT
487 help
488 This enables detection, tunings and quirks for Zhaoxin processors
489
490 You need this enabled if you want your kernel to run on a
491 Zhaoxin CPU. Disabling this option on other types of CPUs
492 makes the kernel a tiny bit smaller. Disabling it on a Zhaoxin
493 CPU might render the kernel unbootable.
494
495 If unsure, say N.
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 2418804e66b4..536b574b6161 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs)
72 72
73 struct thread_info *ti = current_thread_info(); 73 struct thread_info *ti = current_thread_info();
74 unsigned long ret = 0; 74 unsigned long ret = 0;
75 bool emulated = false;
76 u32 work; 75 u32 work;
77 76
78 if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) 77 if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
79 BUG_ON(regs != task_pt_regs(current)); 78 BUG_ON(regs != task_pt_regs(current));
80 79
81 work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; 80 work = READ_ONCE(ti->flags);
82 81
83 if (unlikely(work & _TIF_SYSCALL_EMU)) 82 if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
84 emulated = true; 83 ret = tracehook_report_syscall_entry(regs);
85 84 if (ret || (work & _TIF_SYSCALL_EMU))
86 if ((emulated || (work & _TIF_SYSCALL_TRACE)) && 85 return -1L;
87 tracehook_report_syscall_entry(regs)) 86 }
88 return -1L;
89
90 if (emulated)
91 return -1L;
92 87
93#ifdef CONFIG_SECCOMP 88#ifdef CONFIG_SECCOMP
94 /* 89 /*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..44c6e6f54bf7 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1104,6 +1104,30 @@ ENTRY(irq_entries_start)
1104 .endr 1104 .endr
1105END(irq_entries_start) 1105END(irq_entries_start)
1106 1106
1107#ifdef CONFIG_X86_LOCAL_APIC
1108 .align 8
1109ENTRY(spurious_entries_start)
1110 vector=FIRST_SYSTEM_VECTOR
1111 .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
1112 pushl $(~vector+0x80) /* Note: always in signed byte range */
1113 vector=vector+1
1114 jmp common_spurious
1115 .align 8
1116 .endr
1117END(spurious_entries_start)
1118
1119common_spurious:
1120 ASM_CLAC
1121 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
1122 SAVE_ALL switch_stacks=1
1123 ENCODE_FRAME_POINTER
1124 TRACE_IRQS_OFF
1125 movl %esp, %eax
1126 call smp_spurious_interrupt
1127 jmp ret_from_intr
1128ENDPROC(common_interrupt)
1129#endif
1130
1107/* 1131/*
1108 * the CPU automatically disables interrupts when executing an IRQ vector, 1132 * the CPU automatically disables interrupts when executing an IRQ vector,
1109 * so IRQ-flags tracing has to follow that: 1133 * so IRQ-flags tracing has to follow that:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 11aa3b2afa4d..15f0749d0a15 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -375,6 +375,18 @@ ENTRY(irq_entries_start)
375 .endr 375 .endr
376END(irq_entries_start) 376END(irq_entries_start)
377 377
378 .align 8
379ENTRY(spurious_entries_start)
380 vector=FIRST_SYSTEM_VECTOR
381 .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
382 UNWIND_HINT_IRET_REGS
383 pushq $(~vector+0x80) /* Note: always in signed byte range */
384 jmp common_spurious
385 .align 8
386 vector=vector+1
387 .endr
388END(spurious_entries_start)
389
378.macro DEBUG_ENTRY_ASSERT_IRQS_OFF 390.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
379#ifdef CONFIG_DEBUG_ENTRY 391#ifdef CONFIG_DEBUG_ENTRY
380 pushq %rax 392 pushq %rax
@@ -571,10 +583,20 @@ _ASM_NOKPROBE(interrupt_entry)
571 583
572/* Interrupt entry/exit. */ 584/* Interrupt entry/exit. */
573 585
574 /* 586/*
575 * The interrupt stubs push (~vector+0x80) onto the stack and 587 * The interrupt stubs push (~vector+0x80) onto the stack and
576 * then jump to common_interrupt. 588 * then jump to common_spurious/interrupt.
577 */ 589 */
590common_spurious:
591 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
592 call interrupt_entry
593 UNWIND_HINT_REGS indirect=1
594 call smp_spurious_interrupt /* rdi points to pt_regs */
595 jmp ret_from_intr
596END(common_spurious)
597_ASM_NOKPROBE(common_spurious)
598
599/* common_interrupt is a hotpath. Align it */
578 .p2align CONFIG_X86_L1_CACHE_SHIFT 600 .p2align CONFIG_X86_L1_CACHE_SHIFT
579common_interrupt: 601common_interrupt:
580 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ 602 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
@@ -1670,11 +1692,17 @@ nmi_restore:
1670 iretq 1692 iretq
1671END(nmi) 1693END(nmi)
1672 1694
1695#ifndef CONFIG_IA32_EMULATION
1696/*
1697 * This handles SYSCALL from 32-bit code. There is no way to program
1698 * MSRs to fully disable 32-bit SYSCALL.
1699 */
1673ENTRY(ignore_sysret) 1700ENTRY(ignore_sysret)
1674 UNWIND_HINT_EMPTY 1701 UNWIND_HINT_EMPTY
1675 mov $-ENOSYS, %eax 1702 mov $-ENOSYS, %eax
1676 sysret 1703 sysret
1677END(ignore_sysret) 1704END(ignore_sysret)
1705#endif
1678 1706
1679ENTRY(rewind_stack_do_exit) 1707ENTRY(rewind_stack_do_exit)
1680 UNWIND_HINT_FUNC 1708 UNWIND_HINT_FUNC
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 42fe42e82baf..39106111be86 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -3,6 +3,12 @@
3# Building vDSO images for x86. 3# Building vDSO images for x86.
4# 4#
5 5
6# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
7# the inclusion of generic Makefile.
8ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE|
9ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
10include $(srctree)/lib/vdso/Makefile
11
6KBUILD_CFLAGS += $(DISABLE_LTO) 12KBUILD_CFLAGS += $(DISABLE_LTO)
7KASAN_SANITIZE := n 13KASAN_SANITIZE := n
8UBSAN_SANITIZE := n 14UBSAN_SANITIZE := n
@@ -51,6 +57,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
51 57
52$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE 58$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
53 $(call if_changed,vdso) 59 $(call if_changed,vdso)
60 $(call if_changed,vdso_check)
54 61
55HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi 62HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
56hostprogs-y += vdso2c 63hostprogs-y += vdso2c
@@ -121,6 +128,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
121 128
122$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE 129$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
123 $(call if_changed,vdso) 130 $(call if_changed,vdso)
131 $(call if_changed,vdso_check)
124 132
125CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 133CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
126VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 134VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
@@ -160,6 +168,7 @@ $(obj)/vdso32.so.dbg: FORCE \
160 $(obj)/vdso32/system_call.o \ 168 $(obj)/vdso32/system_call.o \
161 $(obj)/vdso32/sigreturn.o 169 $(obj)/vdso32/sigreturn.o
162 $(call if_changed,vdso) 170 $(call if_changed,vdso)
171 $(call if_changed,vdso_check)
163 172
164# 173#
165# The DSO images are built using a special linker script. 174# The DSO images are built using a special linker script.
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 4aed41f638bb..d9ff616bb0f6 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -1,251 +1,85 @@
1// SPDX-License-Identifier: GPL-2.0-only 1// SPDX-License-Identifier: GPL-2.0-only
2/* 2/*
3 * Copyright 2006 Andi Kleen, SUSE Labs.
4 *
5 * Fast user context implementation of clock_gettime, gettimeofday, and time. 3 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6 * 4 *
5 * Copyright 2006 Andi Kleen, SUSE Labs.
6 * Copyright 2019 ARM Limited
7 *
7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9 *
10 * The code should have no internal unresolved relocations.
11 * Check with readelf after changing.
12 */ 10 */
13
14#include <uapi/linux/time.h>
15#include <asm/vgtod.h>
16#include <asm/vvar.h>
17#include <asm/unistd.h>
18#include <asm/msr.h>
19#include <asm/pvclock.h>
20#include <asm/mshyperv.h>
21#include <linux/math64.h>
22#include <linux/time.h> 11#include <linux/time.h>
23#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/types.h>
24 14
25#define gtod (&VVAR(vsyscall_gtod_data)) 15#include "../../../../lib/vdso/gettimeofday.c"
26 16
27extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); 17extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
28extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
29extern time_t __vdso_time(time_t *t); 18extern time_t __vdso_time(time_t *t);
30 19
31#ifdef CONFIG_PARAVIRT_CLOCK 20int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
32extern u8 pvclock_page[PAGE_SIZE]
33 __attribute__((visibility("hidden")));
34#endif
35
36#ifdef CONFIG_HYPERV_TSCPAGE
37extern u8 hvclock_page[PAGE_SIZE]
38 __attribute__((visibility("hidden")));
39#endif
40
41#ifndef BUILD_VDSO32
42
43notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
44{ 21{
45 long ret; 22 return __cvdso_gettimeofday(tv, tz);
46 asm ("syscall" : "=a" (ret), "=m" (*ts) :
47 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
48 "rcx", "r11");
49 return ret;
50} 23}
51 24
52#else 25int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
26 __attribute__((weak, alias("__vdso_gettimeofday")));
53 27
54notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 28time_t __vdso_time(time_t *t)
55{ 29{
56 long ret; 30 return __cvdso_time(t);
57
58 asm (
59 "mov %%ebx, %%edx \n"
60 "mov %[clock], %%ebx \n"
61 "call __kernel_vsyscall \n"
62 "mov %%edx, %%ebx \n"
63 : "=a" (ret), "=m" (*ts)
64 : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
65 : "edx");
66 return ret;
67} 31}
68 32
69#endif 33time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
70 34
71#ifdef CONFIG_PARAVIRT_CLOCK
72static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
73{
74 return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
75}
76 35
77static notrace u64 vread_pvclock(void) 36#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64)
78{ 37/* both 64-bit and x32 use these */
79 const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; 38extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
80 u32 version; 39extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res);
81 u64 ret;
82
83 /*
84 * Note: The kernel and hypervisor must guarantee that cpu ID
85 * number maps 1:1 to per-CPU pvclock time info.
86 *
87 * Because the hypervisor is entirely unaware of guest userspace
88 * preemption, it cannot guarantee that per-CPU pvclock time
89 * info is updated if the underlying CPU changes or that that
90 * version is increased whenever underlying CPU changes.
91 *
92 * On KVM, we are guaranteed that pvti updates for any vCPU are
93 * atomic as seen by *all* vCPUs. This is an even stronger
94 * guarantee than we get with a normal seqlock.
95 *
96 * On Xen, we don't appear to have that guarantee, but Xen still
97 * supplies a valid seqlock using the version field.
98 *
99 * We only do pvclock vdso timing at all if
100 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
101 * mean that all vCPUs have matching pvti and that the TSC is
102 * synced, so we can just look at vCPU 0's pvti.
103 */
104
105 do {
106 version = pvclock_read_begin(pvti);
107
108 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
109 return U64_MAX;
110
111 ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
112 } while (pvclock_read_retry(pvti, version));
113
114 return ret;
115}
116#endif
117#ifdef CONFIG_HYPERV_TSCPAGE
118static notrace u64 vread_hvclock(void)
119{
120 const struct ms_hyperv_tsc_page *tsc_pg =
121 (const struct ms_hyperv_tsc_page *)&hvclock_page;
122 40
123 return hv_read_tsc_page(tsc_pg); 41int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
124}
125#endif
126
127notrace static inline u64 vgetcyc(int mode)
128{ 42{
129 if (mode == VCLOCK_TSC) 43 return __cvdso_clock_gettime(clock, ts);
130 return (u64)rdtsc_ordered();
131
132 /*
133 * For any memory-mapped vclock type, we need to make sure that gcc
134 * doesn't cleverly hoist a load before the mode check. Otherwise we
135 * might end up touching the memory-mapped page even if the vclock in
136 * question isn't enabled, which will segfault. Hence the barriers.
137 */
138#ifdef CONFIG_PARAVIRT_CLOCK
139 if (mode == VCLOCK_PVCLOCK) {
140 barrier();
141 return vread_pvclock();
142 }
143#endif
144#ifdef CONFIG_HYPERV_TSCPAGE
145 if (mode == VCLOCK_HVCLOCK) {
146 barrier();
147 return vread_hvclock();
148 }
149#endif
150 return U64_MAX;
151} 44}
152 45
153notrace static int do_hres(clockid_t clk, struct timespec *ts) 46int clock_gettime(clockid_t, struct __kernel_timespec *)
154{ 47 __attribute__((weak, alias("__vdso_clock_gettime")));
155 struct vgtod_ts *base = &gtod->basetime[clk];
156 u64 cycles, last, sec, ns;
157 unsigned int seq;
158
159 do {
160 seq = gtod_read_begin(gtod);
161 cycles = vgetcyc(gtod->vclock_mode);
162 ns = base->nsec;
163 last = gtod->cycle_last;
164 if (unlikely((s64)cycles < 0))
165 return vdso_fallback_gettime(clk, ts);
166 if (cycles > last)
167 ns += (cycles - last) * gtod->mult;
168 ns >>= gtod->shift;
169 sec = base->sec;
170 } while (unlikely(gtod_read_retry(gtod, seq)));
171
172 /*
173 * Do this outside the loop: a race inside the loop could result
174 * in __iter_div_u64_rem() being extremely slow.
175 */
176 ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
177 ts->tv_nsec = ns;
178
179 return 0;
180}
181 48
182notrace static void do_coarse(clockid_t clk, struct timespec *ts) 49int __vdso_clock_getres(clockid_t clock,
50 struct __kernel_timespec *res)
183{ 51{
184 struct vgtod_ts *base = &gtod->basetime[clk]; 52 return __cvdso_clock_getres(clock, res);
185 unsigned int seq;
186
187 do {
188 seq = gtod_read_begin(gtod);
189 ts->tv_sec = base->sec;
190 ts->tv_nsec = base->nsec;
191 } while (unlikely(gtod_read_retry(gtod, seq)));
192} 53}
54int clock_getres(clockid_t, struct __kernel_timespec *)
55 __attribute__((weak, alias("__vdso_clock_getres")));
193 56
194notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 57#else
58/* i386 only */
59extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts);
60extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res);
61
62int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
195{ 63{
196 unsigned int msk; 64 return __cvdso_clock_gettime32(clock, ts);
197
198 /* Sort out negative (CPU/FD) and invalid clocks */
199 if (unlikely((unsigned int) clock >= MAX_CLOCKS))
200 return vdso_fallback_gettime(clock, ts);
201
202 /*
203 * Convert the clockid to a bitmask and use it to check which
204 * clocks are handled in the VDSO directly.
205 */
206 msk = 1U << clock;
207 if (likely(msk & VGTOD_HRES)) {
208 return do_hres(clock, ts);
209 } else if (msk & VGTOD_COARSE) {
210 do_coarse(clock, ts);
211 return 0;
212 }
213 return vdso_fallback_gettime(clock, ts);
214} 65}
215 66
216int clock_gettime(clockid_t, struct timespec *) 67int clock_gettime(clockid_t, struct old_timespec32 *)
217 __attribute__((weak, alias("__vdso_clock_gettime"))); 68 __attribute__((weak, alias("__vdso_clock_gettime")));
218 69
219notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 70int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
220{ 71{
221 if (likely(tv != NULL)) { 72 return __cvdso_clock_gettime(clock, ts);
222 struct timespec *ts = (struct timespec *) tv;
223
224 do_hres(CLOCK_REALTIME, ts);
225 tv->tv_usec /= 1000;
226 }
227 if (unlikely(tz != NULL)) {
228 tz->tz_minuteswest = gtod->tz_minuteswest;
229 tz->tz_dsttime = gtod->tz_dsttime;
230 }
231
232 return 0;
233} 73}
234int gettimeofday(struct timeval *, struct timezone *)
235 __attribute__((weak, alias("__vdso_gettimeofday")));
236 74
237/* 75int clock_gettime64(clockid_t, struct __kernel_timespec *)
238 * This will break when the xtime seconds get inaccurate, but that is 76 __attribute__((weak, alias("__vdso_clock_gettime64")));
239 * unlikely
240 */
241notrace time_t __vdso_time(time_t *t)
242{
243 /* This is atomic on x86 so we don't need any locks. */
244 time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
245 77
246 if (t) 78int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res)
247 *t = result; 79{
248 return result; 80 return __cvdso_clock_getres_time32(clock, res);
249} 81}
250time_t time(time_t *t) 82
251 __attribute__((weak, alias("__vdso_time"))); 83int clock_getres(clockid_t, struct old_timespec32 *)
84 __attribute__((weak, alias("__vdso_clock_getres")));
85#endif
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index d3a2dce4cfa9..36b644e16272 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -25,6 +25,8 @@ VERSION {
25 __vdso_getcpu; 25 __vdso_getcpu;
26 time; 26 time;
27 __vdso_time; 27 __vdso_time;
28 clock_getres;
29 __vdso_clock_getres;
28 local: *; 30 local: *;
29 }; 31 };
30} 32}
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 422764a81d32..c7720995ab1a 100644
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
@@ -26,6 +26,8 @@ VERSION
26 __vdso_clock_gettime; 26 __vdso_clock_gettime;
27 __vdso_gettimeofday; 27 __vdso_gettimeofday;
28 __vdso_time; 28 __vdso_time;
29 __vdso_clock_getres;
30 __vdso_clock_gettime64;
29 }; 31 };
30 32
31 LINUX_2.5 { 33 LINUX_2.5 {
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 05cd1c5c4a15..16a8050a4fb6 100644
--- a/arch/x86/entry/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
@@ -21,6 +21,7 @@ VERSION {
21 __vdso_gettimeofday; 21 __vdso_gettimeofday;
22 __vdso_getcpu; 22 __vdso_getcpu;
23 __vdso_time; 23 __vdso_time;
24 __vdso_clock_getres;
24 local: *; 25 local: *;
25 }; 26 };
26} 27}
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 8db1f594e8b1..349a61d8bf34 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -22,7 +22,7 @@
22#include <asm/page.h> 22#include <asm/page.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/cpufeature.h> 24#include <asm/cpufeature.h>
25#include <asm/mshyperv.h> 25#include <clocksource/hyperv_timer.h>
26 26
27#if defined(CONFIG_X86_64) 27#if defined(CONFIG_X86_64)
28unsigned int __read_mostly vdso64_enabled = 1; 28unsigned int __read_mostly vdso64_enabled = 1;
diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile
index 1ac4dd116c26..93c1b3e949a7 100644
--- a/arch/x86/entry/vsyscall/Makefile
+++ b/arch/x86/entry/vsyscall/Makefile
@@ -2,7 +2,5 @@
2# 2#
3# Makefile for the x86 low level vsyscall code 3# Makefile for the x86 low level vsyscall code
4# 4#
5obj-y := vsyscall_gtod.o
6
7obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o 5obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
8 6
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index d9d81ad7a400..07003f3f1bfc 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,9 +42,11 @@
42#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
43#include "vsyscall_trace.h" 43#include "vsyscall_trace.h"
44 44
45static enum { EMULATE, NONE } vsyscall_mode = 45static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
46#ifdef CONFIG_LEGACY_VSYSCALL_NONE 46#ifdef CONFIG_LEGACY_VSYSCALL_NONE
47 NONE; 47 NONE;
48#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
49 XONLY;
48#else 50#else
49 EMULATE; 51 EMULATE;
50#endif 52#endif
@@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str)
54 if (str) { 56 if (str) {
55 if (!strcmp("emulate", str)) 57 if (!strcmp("emulate", str))
56 vsyscall_mode = EMULATE; 58 vsyscall_mode = EMULATE;
59 else if (!strcmp("xonly", str))
60 vsyscall_mode = XONLY;
57 else if (!strcmp("none", str)) 61 else if (!strcmp("none", str))
58 vsyscall_mode = NONE; 62 vsyscall_mode = NONE;
59 else 63 else
@@ -113,7 +117,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
113 } 117 }
114} 118}
115 119
116bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) 120bool emulate_vsyscall(unsigned long error_code,
121 struct pt_regs *regs, unsigned long address)
117{ 122{
118 struct task_struct *tsk; 123 struct task_struct *tsk;
119 unsigned long caller; 124 unsigned long caller;
@@ -122,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
122 long ret; 127 long ret;
123 unsigned long orig_dx; 128 unsigned long orig_dx;
124 129
130 /* Write faults or kernel-privilege faults never get fixed up. */
131 if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
132 return false;
133
134 if (!(error_code & X86_PF_INSTR)) {
135 /* Failed vsyscall read */
136 if (vsyscall_mode == EMULATE)
137 return false;
138
139 /*
140 * User code tried and failed to read the vsyscall page.
141 */
142 warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
143 return false;
144 }
145
125 /* 146 /*
126 * No point in checking CS -- the only way to get here is a user mode 147 * No point in checking CS -- the only way to get here is a user mode
127 * trap to a high address, which means that we're in 64-bit user code. 148 * trap to a high address, which means that we're in 64-bit user code.
@@ -284,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma)
284static const struct vm_operations_struct gate_vma_ops = { 305static const struct vm_operations_struct gate_vma_ops = {
285 .name = gate_vma_name, 306 .name = gate_vma_name,
286}; 307};
287static struct vm_area_struct gate_vma = { 308static struct vm_area_struct gate_vma __ro_after_init = {
288 .vm_start = VSYSCALL_ADDR, 309 .vm_start = VSYSCALL_ADDR,
289 .vm_end = VSYSCALL_ADDR + PAGE_SIZE, 310 .vm_end = VSYSCALL_ADDR + PAGE_SIZE,
290 .vm_page_prot = PAGE_READONLY_EXEC, 311 .vm_page_prot = PAGE_READONLY_EXEC,
@@ -357,12 +378,20 @@ void __init map_vsyscall(void)
357 extern char __vsyscall_page; 378 extern char __vsyscall_page;
358 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 379 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
359 380
360 if (vsyscall_mode != NONE) { 381 /*
382 * For full emulation, the page needs to exist for real. In
383 * execute-only mode, there is no PTE at all backing the vsyscall
384 * page.
385 */
386 if (vsyscall_mode == EMULATE) {
361 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 387 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
362 PAGE_KERNEL_VVAR); 388 PAGE_KERNEL_VVAR);
363 set_vsyscall_pgtable_user_bits(swapper_pg_dir); 389 set_vsyscall_pgtable_user_bits(swapper_pg_dir);
364 } 390 }
365 391
392 if (vsyscall_mode == XONLY)
393 gate_vma.vm_flags = VM_EXEC;
394
366 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != 395 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
367 (unsigned long)VSYSCALL_ADDR); 396 (unsigned long)VSYSCALL_ADDR);
368} 397}
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
deleted file mode 100644
index cfcdba082feb..000000000000
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ /dev/null
@@ -1,83 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
4 * Copyright 2003 Andi Kleen, SuSE Labs.
5 *
6 * Modified for x86 32 bit architecture by
7 * Stefani Seibold <stefani@seibold.net>
8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9 *
10 * Thanks to hpa@transmeta.com for some useful hint.
11 * Special thanks to Ingo Molnar for his early experience with
12 * a different vsyscall implementation for Linux/IA32 and for the name.
13 *
14 */
15
16#include <linux/timekeeper_internal.h>
17#include <asm/vgtod.h>
18#include <asm/vvar.h>
19
20int vclocks_used __read_mostly;
21
22DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
23
24void update_vsyscall_tz(void)
25{
26 vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
27 vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
28}
29
30void update_vsyscall(struct timekeeper *tk)
31{
32 int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
33 struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
34 struct vgtod_ts *base;
35 u64 nsec;
36
37 /* Mark the new vclock used. */
38 BUILD_BUG_ON(VCLOCK_MAX >= 32);
39 WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
40
41 gtod_write_begin(vdata);
42
43 /* copy vsyscall data */
44 vdata->vclock_mode = vclock_mode;
45 vdata->cycle_last = tk->tkr_mono.cycle_last;
46 vdata->mask = tk->tkr_mono.mask;
47 vdata->mult = tk->tkr_mono.mult;
48 vdata->shift = tk->tkr_mono.shift;
49
50 base = &vdata->basetime[CLOCK_REALTIME];
51 base->sec = tk->xtime_sec;
52 base->nsec = tk->tkr_mono.xtime_nsec;
53
54 base = &vdata->basetime[CLOCK_TAI];
55 base->sec = tk->xtime_sec + (s64)tk->tai_offset;
56 base->nsec = tk->tkr_mono.xtime_nsec;
57
58 base = &vdata->basetime[CLOCK_MONOTONIC];
59 base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
60 nsec = tk->tkr_mono.xtime_nsec;
61 nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
62 while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
63 nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
64 base->sec++;
65 }
66 base->nsec = nsec;
67
68 base = &vdata->basetime[CLOCK_REALTIME_COARSE];
69 base->sec = tk->xtime_sec;
70 base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
71
72 base = &vdata->basetime[CLOCK_MONOTONIC_COARSE];
73 base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
74 nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
75 nsec += tk->wall_to_monotonic.tv_nsec;
76 while (nsec >= NSEC_PER_SEC) {
77 nsec -= NSEC_PER_SEC;
78 base->sec++;
79 }
80 base->nsec = nsec;
81
82 gtod_write_end(vdata);
83}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f315425d8468..ceb712b0a1c6 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -561,14 +561,14 @@ int x86_pmu_hw_config(struct perf_event *event)
561 } 561 }
562 562
563 /* sample_regs_user never support XMM registers */ 563 /* sample_regs_user never support XMM registers */
564 if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS)) 564 if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK))
565 return -EINVAL; 565 return -EINVAL;
566 /* 566 /*
567 * Besides the general purpose registers, XMM registers may 567 * Besides the general purpose registers, XMM registers may
568 * be collected in PEBS on some platforms, e.g. Icelake 568 * be collected in PEBS on some platforms, e.g. Icelake
569 */ 569 */
570 if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) { 570 if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) {
571 if (x86_pmu.pebs_no_xmm_regs) 571 if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
572 return -EINVAL; 572 return -EINVAL;
573 573
574 if (!event->attr.precise_ip) 574 if (!event->attr.precise_ip)
@@ -2179,7 +2179,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
2179 * For now, this can't happen because all callers hold mmap_sem 2179 * For now, this can't happen because all callers hold mmap_sem
2180 * for write. If this changes, we'll need a different solution. 2180 * for write. If this changes, we'll need a different solution.
2181 */ 2181 */
2182 lockdep_assert_held_exclusive(&mm->mmap_sem); 2182 lockdep_assert_held_write(&mm->mmap_sem);
2183 2183
2184 if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) 2184 if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
2185 on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); 2185 on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
@@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
2402 return; 2402 return;
2403 } 2403 }
2404 2404
2405 if (perf_hw_regs(regs)) { 2405 if (perf_callchain_store(entry, regs->ip))
2406 if (perf_callchain_store(entry, regs->ip)) 2406 return;
2407 return; 2407
2408 if (perf_hw_regs(regs))
2408 unwind_start(&state, current, regs, NULL); 2409 unwind_start(&state, current, regs, NULL);
2409 } else { 2410 else
2410 unwind_start(&state, current, NULL, (void *)regs->sp); 2411 unwind_start(&state, current, NULL, (void *)regs->sp);
2411 }
2412 2412
2413 for (; !unwind_done(&state); unwind_next_frame(&state)) { 2413 for (; !unwind_done(&state); unwind_next_frame(&state)) {
2414 addr = unwind_get_return_address(&state); 2414 addr = unwind_get_return_address(&state);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7acc526b4ad2..505c73dc6a73 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -987,7 +987,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
987 pebs_data_cfg |= PEBS_DATACFG_GP; 987 pebs_data_cfg |= PEBS_DATACFG_GP;
988 988
989 if ((sample_type & PERF_SAMPLE_REGS_INTR) && 989 if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
990 (attr->sample_regs_intr & PEBS_XMM_REGS)) 990 (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
991 pebs_data_cfg |= PEBS_DATACFG_XMMS; 991 pebs_data_cfg |= PEBS_DATACFG_XMMS;
992 992
993 if (sample_type & PERF_SAMPLE_BRANCH_STACK) { 993 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -1964,10 +1964,9 @@ void __init intel_ds_init(void)
1964 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); 1964 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
1965 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); 1965 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
1966 x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; 1966 x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
1967 if (x86_pmu.version <= 4) { 1967 if (x86_pmu.version <= 4)
1968 x86_pmu.pebs_no_isolation = 1; 1968 x86_pmu.pebs_no_isolation = 1;
1969 x86_pmu.pebs_no_xmm_regs = 1; 1969
1970 }
1971 if (x86_pmu.pebs) { 1970 if (x86_pmu.pebs) {
1972 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; 1971 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
1973 char *pebs_qual = ""; 1972 char *pebs_qual = "";
@@ -2020,9 +2019,9 @@ void __init intel_ds_init(void)
2020 PERF_SAMPLE_TIME; 2019 PERF_SAMPLE_TIME;
2021 x86_pmu.flags |= PMU_FL_PEBS_ALL; 2020 x86_pmu.flags |= PMU_FL_PEBS_ALL;
2022 pebs_qual = "-baseline"; 2021 pebs_qual = "-baseline";
2022 x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2023 } else { 2023 } else {
2024 /* Only basic record supported */ 2024 /* Only basic record supported */
2025 x86_pmu.pebs_no_xmm_regs = 1;
2026 x86_pmu.large_pebs_flags &= 2025 x86_pmu.large_pebs_flags &=
2027 ~(PERF_SAMPLE_ADDR | 2026 ~(PERF_SAMPLE_ADDR |
2028 PERF_SAMPLE_TIME | 2027 PERF_SAMPLE_TIME |
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9e3fbd47cb56..089bfcdf2f7f 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1400,6 +1400,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1400 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init), 1400 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
1401 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), 1401 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
1402 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init), 1402 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
1403 X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
1403 {}, 1404 {},
1404}; 1405};
1405 1406
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a6ac2f4f76fc..4e346856ee19 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -121,24 +121,6 @@ struct amd_nb {
121 (1ULL << PERF_REG_X86_R14) | \ 121 (1ULL << PERF_REG_X86_R14) | \
122 (1ULL << PERF_REG_X86_R15)) 122 (1ULL << PERF_REG_X86_R15))
123 123
124#define PEBS_XMM_REGS \
125 ((1ULL << PERF_REG_X86_XMM0) | \
126 (1ULL << PERF_REG_X86_XMM1) | \
127 (1ULL << PERF_REG_X86_XMM2) | \
128 (1ULL << PERF_REG_X86_XMM3) | \
129 (1ULL << PERF_REG_X86_XMM4) | \
130 (1ULL << PERF_REG_X86_XMM5) | \
131 (1ULL << PERF_REG_X86_XMM6) | \
132 (1ULL << PERF_REG_X86_XMM7) | \
133 (1ULL << PERF_REG_X86_XMM8) | \
134 (1ULL << PERF_REG_X86_XMM9) | \
135 (1ULL << PERF_REG_X86_XMM10) | \
136 (1ULL << PERF_REG_X86_XMM11) | \
137 (1ULL << PERF_REG_X86_XMM12) | \
138 (1ULL << PERF_REG_X86_XMM13) | \
139 (1ULL << PERF_REG_X86_XMM14) | \
140 (1ULL << PERF_REG_X86_XMM15))
141
142/* 124/*
143 * Per register state. 125 * Per register state.
144 */ 126 */
@@ -668,8 +650,7 @@ struct x86_pmu {
668 pebs_broken :1, 650 pebs_broken :1,
669 pebs_prec_dist :1, 651 pebs_prec_dist :1,
670 pebs_no_tlb :1, 652 pebs_no_tlb :1,
671 pebs_no_isolation :1, 653 pebs_no_isolation :1;
672 pebs_no_xmm_regs :1;
673 int pebs_record_size; 654 int pebs_record_size;
674 int pebs_buffer_size; 655 int pebs_buffer_size;
675 int max_pebs_events; 656 int max_pebs_events;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 1608050e9df9..0e033ef11a9f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -17,64 +17,13 @@
17#include <linux/version.h> 17#include <linux/version.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/clockchips.h>
21#include <linux/hyperv.h> 20#include <linux/hyperv.h>
22#include <linux/slab.h> 21#include <linux/slab.h>
23#include <linux/cpuhotplug.h> 22#include <linux/cpuhotplug.h>
24 23#include <clocksource/hyperv_timer.h>
25#ifdef CONFIG_HYPERV_TSCPAGE
26
27static struct ms_hyperv_tsc_page *tsc_pg;
28
29struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
30{
31 return tsc_pg;
32}
33EXPORT_SYMBOL_GPL(hv_get_tsc_page);
34
35static u64 read_hv_clock_tsc(struct clocksource *arg)
36{
37 u64 current_tick = hv_read_tsc_page(tsc_pg);
38
39 if (current_tick == U64_MAX)
40 rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
41
42 return current_tick;
43}
44
45static struct clocksource hyperv_cs_tsc = {
46 .name = "hyperv_clocksource_tsc_page",
47 .rating = 400,
48 .read = read_hv_clock_tsc,
49 .mask = CLOCKSOURCE_MASK(64),
50 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
51};
52#endif
53
54static u64 read_hv_clock_msr(struct clocksource *arg)
55{
56 u64 current_tick;
57 /*
58 * Read the partition counter to get the current tick count. This count
59 * is set to 0 when the partition is created and is incremented in
60 * 100 nanosecond units.
61 */
62 rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
63 return current_tick;
64}
65
66static struct clocksource hyperv_cs_msr = {
67 .name = "hyperv_clocksource_msr",
68 .rating = 400,
69 .read = read_hv_clock_msr,
70 .mask = CLOCKSOURCE_MASK(64),
71 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
72};
73 24
74void *hv_hypercall_pg; 25void *hv_hypercall_pg;
75EXPORT_SYMBOL_GPL(hv_hypercall_pg); 26EXPORT_SYMBOL_GPL(hv_hypercall_pg);
76struct clocksource *hyperv_cs;
77EXPORT_SYMBOL_GPL(hyperv_cs);
78 27
79u32 *hv_vp_index; 28u32 *hv_vp_index;
80EXPORT_SYMBOL_GPL(hv_vp_index); 29EXPORT_SYMBOL_GPL(hv_vp_index);
@@ -343,42 +292,8 @@ void __init hyperv_init(void)
343 292
344 x86_init.pci.arch_init = hv_pci_init; 293 x86_init.pci.arch_init = hv_pci_init;
345 294
346 /* 295 /* Register Hyper-V specific clocksource */
347 * Register Hyper-V specific clocksource. 296 hv_init_clocksource();
348 */
349#ifdef CONFIG_HYPERV_TSCPAGE
350 if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) {
351 union hv_x64_msr_hypercall_contents tsc_msr;
352
353 tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
354 if (!tsc_pg)
355 goto register_msr_cs;
356
357 hyperv_cs = &hyperv_cs_tsc;
358
359 rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
360
361 tsc_msr.enable = 1;
362 tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
363
364 wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
365
366 hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
367
368 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
369 return;
370 }
371register_msr_cs:
372#endif
373 /*
374 * For 32 bit guests just use the MSR based mechanism for reading
375 * the partition counter.
376 */
377
378 hyperv_cs = &hyperv_cs_msr;
379 if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)
380 clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
381
382 return; 297 return;
383 298
384remove_cpuhp_state: 299remove_cpuhp_state:
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1340fa53b575..050e5f9ebf81 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -53,7 +53,7 @@ extern unsigned int apic_verbosity;
53extern int local_apic_timer_c2_ok; 53extern int local_apic_timer_c2_ok;
54 54
55extern int disable_apic; 55extern int disable_apic;
56extern unsigned int lapic_timer_frequency; 56extern unsigned int lapic_timer_period;
57 57
58extern enum apic_intr_mode_id apic_intr_mode; 58extern enum apic_intr_mode_id apic_intr_mode;
59enum apic_intr_mode_id { 59enum apic_intr_mode_id {
@@ -155,7 +155,6 @@ static inline int apic_force_enable(unsigned long addr)
155extern int apic_force_enable(unsigned long addr); 155extern int apic_force_enable(unsigned long addr);
156#endif 156#endif
157 157
158extern void apic_bsp_setup(bool upmode);
159extern void apic_ap_setup(void); 158extern void apic_ap_setup(void);
160 159
161/* 160/*
@@ -175,6 +174,7 @@ extern void lapic_assign_system_vectors(void);
175extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); 174extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
176extern void lapic_online(void); 175extern void lapic_online(void);
177extern void lapic_offline(void); 176extern void lapic_offline(void);
177extern bool apic_needs_pit(void);
178 178
179#else /* !CONFIG_X86_LOCAL_APIC */ 179#else /* !CONFIG_X86_LOCAL_APIC */
180static inline void lapic_shutdown(void) { } 180static inline void lapic_shutdown(void) { }
@@ -188,6 +188,7 @@ static inline void init_bsp_APIC(void) { }
188static inline void apic_intr_mode_init(void) { } 188static inline void apic_intr_mode_init(void) { }
189static inline void lapic_assign_system_vectors(void) { } 189static inline void lapic_assign_system_vectors(void) { }
190static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } 190static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
191static inline bool apic_needs_pit(void) { return true; }
191#endif /* !CONFIG_X86_LOCAL_APIC */ 192#endif /* !CONFIG_X86_LOCAL_APIC */
192 193
193#ifdef CONFIG_X86_X2APIC 194#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ea3d95275b43..115127c7ad28 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
54{ 54{
55 asm volatile(LOCK_PREFIX "addl %1,%0" 55 asm volatile(LOCK_PREFIX "addl %1,%0"
56 : "+m" (v->counter) 56 : "+m" (v->counter)
57 : "ir" (i)); 57 : "ir" (i) : "memory");
58} 58}
59 59
60/** 60/**
@@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
68{ 68{
69 asm volatile(LOCK_PREFIX "subl %1,%0" 69 asm volatile(LOCK_PREFIX "subl %1,%0"
70 : "+m" (v->counter) 70 : "+m" (v->counter)
71 : "ir" (i)); 71 : "ir" (i) : "memory");
72} 72}
73 73
74/** 74/**
@@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
95static __always_inline void arch_atomic_inc(atomic_t *v) 95static __always_inline void arch_atomic_inc(atomic_t *v)
96{ 96{
97 asm volatile(LOCK_PREFIX "incl %0" 97 asm volatile(LOCK_PREFIX "incl %0"
98 : "+m" (v->counter)); 98 : "+m" (v->counter) :: "memory");
99} 99}
100#define arch_atomic_inc arch_atomic_inc 100#define arch_atomic_inc arch_atomic_inc
101 101
@@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
108static __always_inline void arch_atomic_dec(atomic_t *v) 108static __always_inline void arch_atomic_dec(atomic_t *v)
109{ 109{
110 asm volatile(LOCK_PREFIX "decl %0" 110 asm volatile(LOCK_PREFIX "decl %0"
111 : "+m" (v->counter)); 111 : "+m" (v->counter) :: "memory");
112} 112}
113#define arch_atomic_dec arch_atomic_dec 113#define arch_atomic_dec arch_atomic_dec
114 114
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 6a5b0ec460da..52cfaecb13f9 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -9,7 +9,7 @@
9/* An 64bit atomic type */ 9/* An 64bit atomic type */
10 10
11typedef struct { 11typedef struct {
12 u64 __aligned(8) counter; 12 s64 __aligned(8) counter;
13} atomic64_t; 13} atomic64_t;
14 14
15#define ATOMIC64_INIT(val) { (val) } 15#define ATOMIC64_INIT(val) { (val) }
@@ -71,8 +71,7 @@ ATOMIC64_DECL(add_unless);
71 * the old value. 71 * the old value.
72 */ 72 */
73 73
74static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, 74static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
75 long long n)
76{ 75{
77 return arch_cmpxchg64(&v->counter, o, n); 76 return arch_cmpxchg64(&v->counter, o, n);
78} 77}
@@ -85,9 +84,9 @@ static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
85 * Atomically xchgs the value of @v to @n and returns 84 * Atomically xchgs the value of @v to @n and returns
86 * the old value. 85 * the old value.
87 */ 86 */
88static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) 87static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
89{ 88{
90 long long o; 89 s64 o;
91 unsigned high = (unsigned)(n >> 32); 90 unsigned high = (unsigned)(n >> 32);
92 unsigned low = (unsigned)n; 91 unsigned low = (unsigned)n;
93 alternative_atomic64(xchg, "=&A" (o), 92 alternative_atomic64(xchg, "=&A" (o),
@@ -103,7 +102,7 @@ static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
103 * 102 *
104 * Atomically sets the value of @v to @n. 103 * Atomically sets the value of @v to @n.
105 */ 104 */
106static inline void arch_atomic64_set(atomic64_t *v, long long i) 105static inline void arch_atomic64_set(atomic64_t *v, s64 i)
107{ 106{
108 unsigned high = (unsigned)(i >> 32); 107 unsigned high = (unsigned)(i >> 32);
109 unsigned low = (unsigned)i; 108 unsigned low = (unsigned)i;
@@ -118,9 +117,9 @@ static inline void arch_atomic64_set(atomic64_t *v, long long i)
118 * 117 *
119 * Atomically reads the value of @v and returns it. 118 * Atomically reads the value of @v and returns it.
120 */ 119 */
121static inline long long arch_atomic64_read(const atomic64_t *v) 120static inline s64 arch_atomic64_read(const atomic64_t *v)
122{ 121{
123 long long r; 122 s64 r;
124 alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); 123 alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
125 return r; 124 return r;
126} 125}
@@ -132,7 +131,7 @@ static inline long long arch_atomic64_read(const atomic64_t *v)
132 * 131 *
133 * Atomically adds @i to @v and returns @i + *@v 132 * Atomically adds @i to @v and returns @i + *@v
134 */ 133 */
135static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) 134static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
136{ 135{
137 alternative_atomic64(add_return, 136 alternative_atomic64(add_return,
138 ASM_OUTPUT2("+A" (i), "+c" (v)), 137 ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -143,7 +142,7 @@ static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
143/* 142/*
144 * Other variants with different arithmetic operators: 143 * Other variants with different arithmetic operators:
145 */ 144 */
146static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) 145static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
147{ 146{
148 alternative_atomic64(sub_return, 147 alternative_atomic64(sub_return,
149 ASM_OUTPUT2("+A" (i), "+c" (v)), 148 ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -151,18 +150,18 @@ static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
151 return i; 150 return i;
152} 151}
153 152
154static inline long long arch_atomic64_inc_return(atomic64_t *v) 153static inline s64 arch_atomic64_inc_return(atomic64_t *v)
155{ 154{
156 long long a; 155 s64 a;
157 alternative_atomic64(inc_return, "=&A" (a), 156 alternative_atomic64(inc_return, "=&A" (a),
158 "S" (v) : "memory", "ecx"); 157 "S" (v) : "memory", "ecx");
159 return a; 158 return a;
160} 159}
161#define arch_atomic64_inc_return arch_atomic64_inc_return 160#define arch_atomic64_inc_return arch_atomic64_inc_return
162 161
163static inline long long arch_atomic64_dec_return(atomic64_t *v) 162static inline s64 arch_atomic64_dec_return(atomic64_t *v)
164{ 163{
165 long long a; 164 s64 a;
166 alternative_atomic64(dec_return, "=&A" (a), 165 alternative_atomic64(dec_return, "=&A" (a),
167 "S" (v) : "memory", "ecx"); 166 "S" (v) : "memory", "ecx");
168 return a; 167 return a;
@@ -176,7 +175,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
176 * 175 *
177 * Atomically adds @i to @v. 176 * Atomically adds @i to @v.
178 */ 177 */
179static inline long long arch_atomic64_add(long long i, atomic64_t *v) 178static inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
180{ 179{
181 __alternative_atomic64(add, add_return, 180 __alternative_atomic64(add, add_return,
182 ASM_OUTPUT2("+A" (i), "+c" (v)), 181 ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -191,7 +190,7 @@ static inline long long arch_atomic64_add(long long i, atomic64_t *v)
191 * 190 *
192 * Atomically subtracts @i from @v. 191 * Atomically subtracts @i from @v.
193 */ 192 */
194static inline long long arch_atomic64_sub(long long i, atomic64_t *v) 193static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
195{ 194{
196 __alternative_atomic64(sub, sub_return, 195 __alternative_atomic64(sub, sub_return,
197 ASM_OUTPUT2("+A" (i), "+c" (v)), 196 ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -234,8 +233,7 @@ static inline void arch_atomic64_dec(atomic64_t *v)
234 * Atomically adds @a to @v, so long as it was not @u. 233 * Atomically adds @a to @v, so long as it was not @u.
235 * Returns non-zero if the add was done, zero otherwise. 234 * Returns non-zero if the add was done, zero otherwise.
236 */ 235 */
237static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, 236static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
238 long long u)
239{ 237{
240 unsigned low = (unsigned)u; 238 unsigned low = (unsigned)u;
241 unsigned high = (unsigned)(u >> 32); 239 unsigned high = (unsigned)(u >> 32);
@@ -254,9 +252,9 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
254} 252}
255#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero 253#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
256 254
257static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) 255static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
258{ 256{
259 long long r; 257 s64 r;
260 alternative_atomic64(dec_if_positive, "=&A" (r), 258 alternative_atomic64(dec_if_positive, "=&A" (r),
261 "S" (v) : "ecx", "memory"); 259 "S" (v) : "ecx", "memory");
262 return r; 260 return r;
@@ -266,17 +264,17 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
266#undef alternative_atomic64 264#undef alternative_atomic64
267#undef __alternative_atomic64 265#undef __alternative_atomic64
268 266
269static inline void arch_atomic64_and(long long i, atomic64_t *v) 267static inline void arch_atomic64_and(s64 i, atomic64_t *v)
270{ 268{
271 long long old, c = 0; 269 s64 old, c = 0;
272 270
273 while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) 271 while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
274 c = old; 272 c = old;
275} 273}
276 274
277static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) 275static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
278{ 276{
279 long long old, c = 0; 277 s64 old, c = 0;
280 278
281 while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) 279 while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
282 c = old; 280 c = old;
@@ -284,17 +282,17 @@ static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
284 return old; 282 return old;
285} 283}
286 284
287static inline void arch_atomic64_or(long long i, atomic64_t *v) 285static inline void arch_atomic64_or(s64 i, atomic64_t *v)
288{ 286{
289 long long old, c = 0; 287 s64 old, c = 0;
290 288
291 while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) 289 while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
292 c = old; 290 c = old;
293} 291}
294 292
295static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) 293static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
296{ 294{
297 long long old, c = 0; 295 s64 old, c = 0;
298 296
299 while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) 297 while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
300 c = old; 298 c = old;
@@ -302,17 +300,17 @@ static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
302 return old; 300 return old;
303} 301}
304 302
305static inline void arch_atomic64_xor(long long i, atomic64_t *v) 303static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
306{ 304{
307 long long old, c = 0; 305 s64 old, c = 0;
308 306
309 while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) 307 while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
310 c = old; 308 c = old;
311} 309}
312 310
313static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) 311static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
314{ 312{
315 long long old, c = 0; 313 s64 old, c = 0;
316 314
317 while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) 315 while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
318 c = old; 316 c = old;
@@ -320,9 +318,9 @@ static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
320 return old; 318 return old;
321} 319}
322 320
323static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v) 321static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
324{ 322{
325 long long old, c = 0; 323 s64 old, c = 0;
326 324
327 while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) 325 while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
328 c = old; 326 c = old;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index dadc20adba21..95c6ceac66b9 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -17,7 +17,7 @@
17 * Atomically reads the value of @v. 17 * Atomically reads the value of @v.
18 * Doesn't imply a read memory barrier. 18 * Doesn't imply a read memory barrier.
19 */ 19 */
20static inline long arch_atomic64_read(const atomic64_t *v) 20static inline s64 arch_atomic64_read(const atomic64_t *v)
21{ 21{
22 return READ_ONCE((v)->counter); 22 return READ_ONCE((v)->counter);
23} 23}
@@ -29,7 +29,7 @@ static inline long arch_atomic64_read(const atomic64_t *v)
29 * 29 *
30 * Atomically sets the value of @v to @i. 30 * Atomically sets the value of @v to @i.
31 */ 31 */
32static inline void arch_atomic64_set(atomic64_t *v, long i) 32static inline void arch_atomic64_set(atomic64_t *v, s64 i)
33{ 33{
34 WRITE_ONCE(v->counter, i); 34 WRITE_ONCE(v->counter, i);
35} 35}
@@ -41,11 +41,11 @@ static inline void arch_atomic64_set(atomic64_t *v, long i)
41 * 41 *
42 * Atomically adds @i to @v. 42 * Atomically adds @i to @v.
43 */ 43 */
44static __always_inline void arch_atomic64_add(long i, atomic64_t *v) 44static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
45{ 45{
46 asm volatile(LOCK_PREFIX "addq %1,%0" 46 asm volatile(LOCK_PREFIX "addq %1,%0"
47 : "=m" (v->counter) 47 : "=m" (v->counter)
48 : "er" (i), "m" (v->counter)); 48 : "er" (i), "m" (v->counter) : "memory");
49} 49}
50 50
51/** 51/**
@@ -55,11 +55,11 @@ static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
55 * 55 *
56 * Atomically subtracts @i from @v. 56 * Atomically subtracts @i from @v.
57 */ 57 */
58static inline void arch_atomic64_sub(long i, atomic64_t *v) 58static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
59{ 59{
60 asm volatile(LOCK_PREFIX "subq %1,%0" 60 asm volatile(LOCK_PREFIX "subq %1,%0"
61 : "=m" (v->counter) 61 : "=m" (v->counter)
62 : "er" (i), "m" (v->counter)); 62 : "er" (i), "m" (v->counter) : "memory");
63} 63}
64 64
65/** 65/**
@@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
71 * true if the result is zero, or false for all 71 * true if the result is zero, or false for all
72 * other cases. 72 * other cases.
73 */ 73 */
74static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) 74static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
75{ 75{
76 return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); 76 return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
77} 77}
@@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
87{ 87{
88 asm volatile(LOCK_PREFIX "incq %0" 88 asm volatile(LOCK_PREFIX "incq %0"
89 : "=m" (v->counter) 89 : "=m" (v->counter)
90 : "m" (v->counter)); 90 : "m" (v->counter) : "memory");
91} 91}
92#define arch_atomic64_inc arch_atomic64_inc 92#define arch_atomic64_inc arch_atomic64_inc
93 93
@@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
101{ 101{
102 asm volatile(LOCK_PREFIX "decq %0" 102 asm volatile(LOCK_PREFIX "decq %0"
103 : "=m" (v->counter) 103 : "=m" (v->counter)
104 : "m" (v->counter)); 104 : "m" (v->counter) : "memory");
105} 105}
106#define arch_atomic64_dec arch_atomic64_dec 106#define arch_atomic64_dec arch_atomic64_dec
107 107
@@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
142 * if the result is negative, or false when 142 * if the result is negative, or false when
143 * result is greater than or equal to zero. 143 * result is greater than or equal to zero.
144 */ 144 */
145static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) 145static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
146{ 146{
147 return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); 147 return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
148} 148}
@@ -155,43 +155,43 @@ static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
155 * 155 *
156 * Atomically adds @i to @v and returns @i + @v 156 * Atomically adds @i to @v and returns @i + @v
157 */ 157 */
158static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v) 158static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
159{ 159{
160 return i + xadd(&v->counter, i); 160 return i + xadd(&v->counter, i);
161} 161}
162 162
163static inline long arch_atomic64_sub_return(long i, atomic64_t *v) 163static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
164{ 164{
165 return arch_atomic64_add_return(-i, v); 165 return arch_atomic64_add_return(-i, v);
166} 166}
167 167
168static inline long arch_atomic64_fetch_add(long i, atomic64_t *v) 168static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
169{ 169{
170 return xadd(&v->counter, i); 170 return xadd(&v->counter, i);
171} 171}
172 172
173static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v) 173static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
174{ 174{
175 return xadd(&v->counter, -i); 175 return xadd(&v->counter, -i);
176} 176}
177 177
178static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new) 178static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
179{ 179{
180 return arch_cmpxchg(&v->counter, old, new); 180 return arch_cmpxchg(&v->counter, old, new);
181} 181}
182 182
183#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg 183#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
184static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) 184static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
185{ 185{
186 return try_cmpxchg(&v->counter, old, new); 186 return try_cmpxchg(&v->counter, old, new);
187} 187}
188 188
189static inline long arch_atomic64_xchg(atomic64_t *v, long new) 189static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
190{ 190{
191 return arch_xchg(&v->counter, new); 191 return arch_xchg(&v->counter, new);
192} 192}
193 193
194static inline void arch_atomic64_and(long i, atomic64_t *v) 194static inline void arch_atomic64_and(s64 i, atomic64_t *v)
195{ 195{
196 asm volatile(LOCK_PREFIX "andq %1,%0" 196 asm volatile(LOCK_PREFIX "andq %1,%0"
197 : "+m" (v->counter) 197 : "+m" (v->counter)
@@ -199,7 +199,7 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
199 : "memory"); 199 : "memory");
200} 200}
201 201
202static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) 202static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
203{ 203{
204 s64 val = arch_atomic64_read(v); 204 s64 val = arch_atomic64_read(v);
205 205
@@ -208,7 +208,7 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
208 return val; 208 return val;
209} 209}
210 210
211static inline void arch_atomic64_or(long i, atomic64_t *v) 211static inline void arch_atomic64_or(s64 i, atomic64_t *v)
212{ 212{
213 asm volatile(LOCK_PREFIX "orq %1,%0" 213 asm volatile(LOCK_PREFIX "orq %1,%0"
214 : "+m" (v->counter) 214 : "+m" (v->counter)
@@ -216,7 +216,7 @@ static inline void arch_atomic64_or(long i, atomic64_t *v)
216 : "memory"); 216 : "memory");
217} 217}
218 218
219static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) 219static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
220{ 220{
221 s64 val = arch_atomic64_read(v); 221 s64 val = arch_atomic64_read(v);
222 222
@@ -225,7 +225,7 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
225 return val; 225 return val;
226} 226}
227 227
228static inline void arch_atomic64_xor(long i, atomic64_t *v) 228static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
229{ 229{
230 asm volatile(LOCK_PREFIX "xorq %1,%0" 230 asm volatile(LOCK_PREFIX "xorq %1,%0"
231 : "+m" (v->counter) 231 : "+m" (v->counter)
@@ -233,7 +233,7 @@ static inline void arch_atomic64_xor(long i, atomic64_t *v)
233 : "memory"); 233 : "memory");
234} 234}
235 235
236static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v) 236static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
237{ 237{
238 s64 val = arch_atomic64_read(v); 238 s64 val = arch_atomic64_read(v);
239 239
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 14de0432d288..84f848c2541a 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -80,8 +80,8 @@ do { \
80}) 80})
81 81
82/* Atomic operations are already serializing on x86 */ 82/* Atomic operations are already serializing on x86 */
83#define __smp_mb__before_atomic() barrier() 83#define __smp_mb__before_atomic() do { } while (0)
84#define __smp_mb__after_atomic() barrier() 84#define __smp_mb__after_atomic() do { } while (0)
85 85
86#include <asm-generic/barrier.h> 86#include <asm-generic/barrier.h>
87 87
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d337c51f7e6..58acda503817 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,8 +22,8 @@ enum cpuid_leafs
22 CPUID_LNX_3, 22 CPUID_LNX_3,
23 CPUID_7_0_EBX, 23 CPUID_7_0_EBX,
24 CPUID_D_1_EAX, 24 CPUID_D_1_EAX,
25 CPUID_F_0_EDX, 25 CPUID_LNX_4,
26 CPUID_F_1_EDX, 26 CPUID_7_1_EAX,
27 CPUID_8000_0008_EBX, 27 CPUID_8000_0008_EBX,
28 CPUID_6_EAX, 28 CPUID_6_EAX,
29 CPUID_8000_000A_EDX, 29 CPUID_8000_000A_EDX,
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 75f27ee2c263..998c2cc08363 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -239,12 +239,14 @@
239#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ 239#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
240#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ 240#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
241#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ 241#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
242#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */
242#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ 243#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
243#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ 244#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
244#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ 245#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
245#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ 246#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
246#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ 247#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
247#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ 248#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
249#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */
248#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ 250#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
249#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ 251#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
250#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ 252#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
@@ -269,13 +271,19 @@
269#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ 271#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
270#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ 272#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
271 273
272/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ 274/*
273#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ 275 * Extended auxiliary flags: Linux defined - for features scattered in various
276 * CPUID levels like 0xf, etc.
277 *
278 * Reuse free bits when adding new feature flags!
279 */
280#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */
281#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
282#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
283#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
274 284
275/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ 285/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
276#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ 286#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
277#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
278#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
279 287
280/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ 288/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
281#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ 289#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -322,6 +330,7 @@
322#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ 330#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
323#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ 331#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
324#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ 332#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
333#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
325#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ 334#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
326#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ 335#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
327#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ 336#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 7e42b285c856..c6136d79f8c0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -47,7 +47,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
47extern void __init update_regset_xstate_info(unsigned int size, 47extern void __init update_regset_xstate_info(unsigned int size,
48 u64 xstate_mask); 48 u64 xstate_mask);
49 49
50void fpu__xstate_clear_all_cpu_caps(void);
51void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); 50void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
52const void *get_xsave_field_ptr(int xfeature_nr); 51const void *get_xsave_field_ptr(int xfeature_nr);
53int using_compacted_format(void); 52int using_compacted_format(void);
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 67385d56d4f4..6352dee37cda 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -75,16 +75,15 @@ extern unsigned int hpet_readl(unsigned int a);
75extern void force_hpet_resume(void); 75extern void force_hpet_resume(void);
76 76
77struct irq_data; 77struct irq_data;
78struct hpet_dev; 78struct hpet_channel;
79struct irq_domain; 79struct irq_domain;
80 80
81extern void hpet_msi_unmask(struct irq_data *data); 81extern void hpet_msi_unmask(struct irq_data *data);
82extern void hpet_msi_mask(struct irq_data *data); 82extern void hpet_msi_mask(struct irq_data *data);
83extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); 83extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg);
84extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
85extern struct irq_domain *hpet_create_irq_domain(int hpet_id); 84extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
86extern int hpet_assign_irq(struct irq_domain *domain, 85extern int hpet_assign_irq(struct irq_domain *domain,
87 struct hpet_dev *dev, int dev_num); 86 struct hpet_channel *hc, int dev_num);
88 87
89#ifdef CONFIG_HPET_EMULATE_RTC 88#ifdef CONFIG_HPET_EMULATE_RTC
90 89
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 32e666e1231e..cbd97e22d2f3 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -150,8 +150,11 @@ extern char irq_entries_start[];
150#define trace_irq_entries_start irq_entries_start 150#define trace_irq_entries_start irq_entries_start
151#endif 151#endif
152 152
153extern char spurious_entries_start[];
154
153#define VECTOR_UNUSED NULL 155#define VECTOR_UNUSED NULL
154#define VECTOR_RETRIGGERED ((void *)~0UL) 156#define VECTOR_SHUTDOWN ((void *)~0UL)
157#define VECTOR_RETRIGGERED ((void *)~1UL)
155 158
156typedef struct irq_desc* vector_irq_t[NR_VECTORS]; 159typedef struct irq_desc* vector_irq_t[NR_VECTORS];
157DECLARE_PER_CPU(vector_irq_t, vector_irq); 160DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index cdf44aa9a501..af78cd72b8f3 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT {
401#define HV_STATUS_INVALID_CONNECTION_ID 18 401#define HV_STATUS_INVALID_CONNECTION_ID 18
402#define HV_STATUS_INSUFFICIENT_BUFFERS 19 402#define HV_STATUS_INSUFFICIENT_BUFFERS 19
403 403
404/*
405 * The Hyper-V TimeRefCount register and the TSC
406 * page provide a guest VM clock with 100ns tick rate
407 */
408#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
409
404typedef struct _HV_REFERENCE_TSC_PAGE { 410typedef struct _HV_REFERENCE_TSC_PAGE {
405 __u32 tsc_sequence; 411 __u32 tsc_sequence;
406 __u32 res1; 412 __u32 res1;
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 310118805f57..0278aa66ef62 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -56,6 +56,7 @@
56#define INTEL_FAM6_ICELAKE_XEON_D 0x6C 56#define INTEL_FAM6_ICELAKE_XEON_D 0x6C
57#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D 57#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D
58#define INTEL_FAM6_ICELAKE_MOBILE 0x7E 58#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
59#define INTEL_FAM6_ICELAKE_NNPI 0x9D
59 60
60/* "Small Core" Processors (Atom) */ 61/* "Small Core" Processors (Atom) */
61 62
@@ -76,6 +77,7 @@
76#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ 77#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
77#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ 78#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
78#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ 79#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
80
79#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ 81#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
80 82
81/* Xeon Phi */ 83/* Xeon Phi */
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 8f3bee821e6c..187ce59aea28 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -16,7 +16,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
16 16
17static inline struct pt_regs *get_irq_regs(void) 17static inline struct pt_regs *get_irq_regs(void)
18{ 18{
19 return this_cpu_read(irq_regs); 19 return __this_cpu_read(irq_regs);
20} 20}
21 21
22static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) 22static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -24,7 +24,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
24 struct pt_regs *old_regs; 24 struct pt_regs *old_regs;
25 25
26 old_regs = get_irq_regs(); 26 old_regs = get_irq_regs();
27 this_cpu_write(irq_regs, new_regs); 27 __this_cpu_write(irq_regs, new_regs);
28 28
29 return old_regs; 29 return old_regs;
30} 30}
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 65191ce8e1cf..06c3cc22a058 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,6 +2,8 @@
2#ifndef _ASM_X86_JUMP_LABEL_H 2#ifndef _ASM_X86_JUMP_LABEL_H
3#define _ASM_X86_JUMP_LABEL_H 3#define _ASM_X86_JUMP_LABEL_H
4 4
5#define HAVE_JUMP_LABEL_BATCH
6
5#define JUMP_LABEL_NOP_SIZE 5 7#define JUMP_LABEL_NOP_SIZE 5
6 8
7#ifdef CONFIG_X86_64 9#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index cc60e617931c..f4fa8a9d5d0b 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -105,6 +105,17 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
105#define hv_get_crash_ctl(val) \ 105#define hv_get_crash_ctl(val) \
106 rdmsrl(HV_X64_MSR_CRASH_CTL, val) 106 rdmsrl(HV_X64_MSR_CRASH_CTL, val)
107 107
108#define hv_get_time_ref_count(val) \
109 rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val)
110
111#define hv_get_reference_tsc(val) \
112 rdmsrl(HV_X64_MSR_REFERENCE_TSC, val)
113#define hv_set_reference_tsc(val) \
114 wrmsrl(HV_X64_MSR_REFERENCE_TSC, val)
115#define hv_set_clocksource_vdso(val) \
116 ((val).archdata.vclock_mode = VCLOCK_HVCLOCK)
117#define hv_get_raw_timer() rdtsc_ordered()
118
108void hyperv_callback_vector(void); 119void hyperv_callback_vector(void);
109void hyperv_reenlightenment_vector(void); 120void hyperv_reenlightenment_vector(void);
110#ifdef CONFIG_TRACING 121#ifdef CONFIG_TRACING
@@ -133,7 +144,6 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
133 144
134 145
135#if IS_ENABLED(CONFIG_HYPERV) 146#if IS_ENABLED(CONFIG_HYPERV)
136extern struct clocksource *hyperv_cs;
137extern void *hv_hypercall_pg; 147extern void *hv_hypercall_pg;
138extern void __percpu **hyperv_pcpu_input_arg; 148extern void __percpu **hyperv_pcpu_input_arg;
139 149
@@ -387,73 +397,4 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
387} 397}
388#endif /* CONFIG_HYPERV */ 398#endif /* CONFIG_HYPERV */
389 399
390#ifdef CONFIG_HYPERV_TSCPAGE
391struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
392static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
393 u64 *cur_tsc)
394{
395 u64 scale, offset;
396 u32 sequence;
397
398 /*
399 * The protocol for reading Hyper-V TSC page is specified in Hypervisor
400 * Top-Level Functional Specification ver. 3.0 and above. To get the
401 * reference time we must do the following:
402 * - READ ReferenceTscSequence
403 * A special '0' value indicates the time source is unreliable and we
404 * need to use something else. The currently published specification
405 * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
406 * instead of '0' as the special value, see commit c35b82ef0294.
407 * - ReferenceTime =
408 * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
409 * - READ ReferenceTscSequence again. In case its value has changed
410 * since our first reading we need to discard ReferenceTime and repeat
411 * the whole sequence as the hypervisor was updating the page in
412 * between.
413 */
414 do {
415 sequence = READ_ONCE(tsc_pg->tsc_sequence);
416 if (!sequence)
417 return U64_MAX;
418 /*
419 * Make sure we read sequence before we read other values from
420 * TSC page.
421 */
422 smp_rmb();
423
424 scale = READ_ONCE(tsc_pg->tsc_scale);
425 offset = READ_ONCE(tsc_pg->tsc_offset);
426 *cur_tsc = rdtsc_ordered();
427
428 /*
429 * Make sure we read sequence after we read all other values
430 * from TSC page.
431 */
432 smp_rmb();
433
434 } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
435
436 return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
437}
438
439static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
440{
441 u64 cur_tsc;
442
443 return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
444}
445
446#else
447static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
448{
449 return NULL;
450}
451
452static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
453 u64 *cur_tsc)
454{
455 BUG();
456 return U64_MAX;
457}
458#endif
459#endif 400#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 979ef971cc78..6b4fc2788078 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,6 +61,15 @@
61#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 61#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
62#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) 62#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
63 63
64#define MSR_IA32_UMWAIT_CONTROL 0xe1
65#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0)
66#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1)
67/*
68 * The time field is bit[31:2], but representing a 32bit value with
69 * bit[1:0] zero.
70 */
71#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
72
64#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 73#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
65#define NHM_C3_AUTO_DEMOTE (1UL << 25) 74#define NHM_C3_AUTO_DEMOTE (1UL << 25)
66#define NHM_C1_AUTO_DEMOTE (1UL << 26) 75#define NHM_C1_AUTO_DEMOTE (1UL << 26)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index eb0f80ce8524..e28f8b723b5c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -86,9 +86,9 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
86 86
87static inline void __sti_mwait(unsigned long eax, unsigned long ecx) 87static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
88{ 88{
89 mds_idle_clear_cpu_buffers();
90
91 trace_hardirqs_on(); 89 trace_hardirqs_on();
90
91 mds_idle_clear_cpu_buffers();
92 /* "mwait %eax, %ecx;" */ 92 /* "mwait %eax, %ecx;" */
93 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" 93 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
94 :: "a" (eax), "c" (ecx)); 94 :: "a" (eax), "c" (ecx));
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1a19d11cfbbd..2278797c769d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -87,7 +87,7 @@
87 * don't give an lvalue though). */ 87 * don't give an lvalue though). */
88extern void __bad_percpu_size(void); 88extern void __bad_percpu_size(void);
89 89
90#define percpu_to_op(op, var, val) \ 90#define percpu_to_op(qual, op, var, val) \
91do { \ 91do { \
92 typedef typeof(var) pto_T__; \ 92 typedef typeof(var) pto_T__; \
93 if (0) { \ 93 if (0) { \
@@ -97,22 +97,22 @@ do { \
97 } \ 97 } \
98 switch (sizeof(var)) { \ 98 switch (sizeof(var)) { \
99 case 1: \ 99 case 1: \
100 asm(op "b %1,"__percpu_arg(0) \ 100 asm qual (op "b %1,"__percpu_arg(0) \
101 : "+m" (var) \ 101 : "+m" (var) \
102 : "qi" ((pto_T__)(val))); \ 102 : "qi" ((pto_T__)(val))); \
103 break; \ 103 break; \
104 case 2: \ 104 case 2: \
105 asm(op "w %1,"__percpu_arg(0) \ 105 asm qual (op "w %1,"__percpu_arg(0) \
106 : "+m" (var) \ 106 : "+m" (var) \
107 : "ri" ((pto_T__)(val))); \ 107 : "ri" ((pto_T__)(val))); \
108 break; \ 108 break; \
109 case 4: \ 109 case 4: \
110 asm(op "l %1,"__percpu_arg(0) \ 110 asm qual (op "l %1,"__percpu_arg(0) \
111 : "+m" (var) \ 111 : "+m" (var) \
112 : "ri" ((pto_T__)(val))); \ 112 : "ri" ((pto_T__)(val))); \
113 break; \ 113 break; \
114 case 8: \ 114 case 8: \
115 asm(op "q %1,"__percpu_arg(0) \ 115 asm qual (op "q %1,"__percpu_arg(0) \
116 : "+m" (var) \ 116 : "+m" (var) \
117 : "re" ((pto_T__)(val))); \ 117 : "re" ((pto_T__)(val))); \
118 break; \ 118 break; \
@@ -124,7 +124,7 @@ do { \
124 * Generate a percpu add to memory instruction and optimize code 124 * Generate a percpu add to memory instruction and optimize code
125 * if one is added or subtracted. 125 * if one is added or subtracted.
126 */ 126 */
127#define percpu_add_op(var, val) \ 127#define percpu_add_op(qual, var, val) \
128do { \ 128do { \
129 typedef typeof(var) pao_T__; \ 129 typedef typeof(var) pao_T__; \
130 const int pao_ID__ = (__builtin_constant_p(val) && \ 130 const int pao_ID__ = (__builtin_constant_p(val) && \
@@ -138,41 +138,41 @@ do { \
138 switch (sizeof(var)) { \ 138 switch (sizeof(var)) { \
139 case 1: \ 139 case 1: \
140 if (pao_ID__ == 1) \ 140 if (pao_ID__ == 1) \
141 asm("incb "__percpu_arg(0) : "+m" (var)); \ 141 asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
142 else if (pao_ID__ == -1) \ 142 else if (pao_ID__ == -1) \
143 asm("decb "__percpu_arg(0) : "+m" (var)); \ 143 asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
144 else \ 144 else \
145 asm("addb %1, "__percpu_arg(0) \ 145 asm qual ("addb %1, "__percpu_arg(0) \
146 : "+m" (var) \ 146 : "+m" (var) \
147 : "qi" ((pao_T__)(val))); \ 147 : "qi" ((pao_T__)(val))); \
148 break; \ 148 break; \
149 case 2: \ 149 case 2: \
150 if (pao_ID__ == 1) \ 150 if (pao_ID__ == 1) \
151 asm("incw "__percpu_arg(0) : "+m" (var)); \ 151 asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
152 else if (pao_ID__ == -1) \ 152 else if (pao_ID__ == -1) \
153 asm("decw "__percpu_arg(0) : "+m" (var)); \ 153 asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
154 else \ 154 else \
155 asm("addw %1, "__percpu_arg(0) \ 155 asm qual ("addw %1, "__percpu_arg(0) \
156 : "+m" (var) \ 156 : "+m" (var) \
157 : "ri" ((pao_T__)(val))); \ 157 : "ri" ((pao_T__)(val))); \
158 break; \ 158 break; \
159 case 4: \ 159 case 4: \
160 if (pao_ID__ == 1) \ 160 if (pao_ID__ == 1) \
161 asm("incl "__percpu_arg(0) : "+m" (var)); \ 161 asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
162 else if (pao_ID__ == -1) \ 162 else if (pao_ID__ == -1) \
163 asm("decl "__percpu_arg(0) : "+m" (var)); \ 163 asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
164 else \ 164 else \
165 asm("addl %1, "__percpu_arg(0) \ 165 asm qual ("addl %1, "__percpu_arg(0) \
166 : "+m" (var) \ 166 : "+m" (var) \
167 : "ri" ((pao_T__)(val))); \ 167 : "ri" ((pao_T__)(val))); \
168 break; \ 168 break; \
169 case 8: \ 169 case 8: \
170 if (pao_ID__ == 1) \ 170 if (pao_ID__ == 1) \
171 asm("incq "__percpu_arg(0) : "+m" (var)); \ 171 asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
172 else if (pao_ID__ == -1) \ 172 else if (pao_ID__ == -1) \
173 asm("decq "__percpu_arg(0) : "+m" (var)); \ 173 asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
174 else \ 174 else \
175 asm("addq %1, "__percpu_arg(0) \ 175 asm qual ("addq %1, "__percpu_arg(0) \
176 : "+m" (var) \ 176 : "+m" (var) \
177 : "re" ((pao_T__)(val))); \ 177 : "re" ((pao_T__)(val))); \
178 break; \ 178 break; \
@@ -180,27 +180,27 @@ do { \
180 } \ 180 } \
181} while (0) 181} while (0)
182 182
183#define percpu_from_op(op, var) \ 183#define percpu_from_op(qual, op, var) \
184({ \ 184({ \
185 typeof(var) pfo_ret__; \ 185 typeof(var) pfo_ret__; \
186 switch (sizeof(var)) { \ 186 switch (sizeof(var)) { \
187 case 1: \ 187 case 1: \
188 asm volatile(op "b "__percpu_arg(1)",%0"\ 188 asm qual (op "b "__percpu_arg(1)",%0" \
189 : "=q" (pfo_ret__) \ 189 : "=q" (pfo_ret__) \
190 : "m" (var)); \ 190 : "m" (var)); \
191 break; \ 191 break; \
192 case 2: \ 192 case 2: \
193 asm volatile(op "w "__percpu_arg(1)",%0"\ 193 asm qual (op "w "__percpu_arg(1)",%0" \
194 : "=r" (pfo_ret__) \ 194 : "=r" (pfo_ret__) \
195 : "m" (var)); \ 195 : "m" (var)); \
196 break; \ 196 break; \
197 case 4: \ 197 case 4: \
198 asm volatile(op "l "__percpu_arg(1)",%0"\ 198 asm qual (op "l "__percpu_arg(1)",%0" \
199 : "=r" (pfo_ret__) \ 199 : "=r" (pfo_ret__) \
200 : "m" (var)); \ 200 : "m" (var)); \
201 break; \ 201 break; \
202 case 8: \ 202 case 8: \
203 asm volatile(op "q "__percpu_arg(1)",%0"\ 203 asm qual (op "q "__percpu_arg(1)",%0" \
204 : "=r" (pfo_ret__) \ 204 : "=r" (pfo_ret__) \
205 : "m" (var)); \ 205 : "m" (var)); \
206 break; \ 206 break; \
@@ -238,23 +238,23 @@ do { \
238 pfo_ret__; \ 238 pfo_ret__; \
239}) 239})
240 240
241#define percpu_unary_op(op, var) \ 241#define percpu_unary_op(qual, op, var) \
242({ \ 242({ \
243 switch (sizeof(var)) { \ 243 switch (sizeof(var)) { \
244 case 1: \ 244 case 1: \
245 asm(op "b "__percpu_arg(0) \ 245 asm qual (op "b "__percpu_arg(0) \
246 : "+m" (var)); \ 246 : "+m" (var)); \
247 break; \ 247 break; \
248 case 2: \ 248 case 2: \
249 asm(op "w "__percpu_arg(0) \ 249 asm qual (op "w "__percpu_arg(0) \
250 : "+m" (var)); \ 250 : "+m" (var)); \
251 break; \ 251 break; \
252 case 4: \ 252 case 4: \
253 asm(op "l "__percpu_arg(0) \ 253 asm qual (op "l "__percpu_arg(0) \
254 : "+m" (var)); \ 254 : "+m" (var)); \
255 break; \ 255 break; \
256 case 8: \ 256 case 8: \
257 asm(op "q "__percpu_arg(0) \ 257 asm qual (op "q "__percpu_arg(0) \
258 : "+m" (var)); \ 258 : "+m" (var)); \
259 break; \ 259 break; \
260 default: __bad_percpu_size(); \ 260 default: __bad_percpu_size(); \
@@ -264,27 +264,27 @@ do { \
264/* 264/*
265 * Add return operation 265 * Add return operation
266 */ 266 */
267#define percpu_add_return_op(var, val) \ 267#define percpu_add_return_op(qual, var, val) \
268({ \ 268({ \
269 typeof(var) paro_ret__ = val; \ 269 typeof(var) paro_ret__ = val; \
270 switch (sizeof(var)) { \ 270 switch (sizeof(var)) { \
271 case 1: \ 271 case 1: \
272 asm("xaddb %0, "__percpu_arg(1) \ 272 asm qual ("xaddb %0, "__percpu_arg(1) \
273 : "+q" (paro_ret__), "+m" (var) \ 273 : "+q" (paro_ret__), "+m" (var) \
274 : : "memory"); \ 274 : : "memory"); \
275 break; \ 275 break; \
276 case 2: \ 276 case 2: \
277 asm("xaddw %0, "__percpu_arg(1) \ 277 asm qual ("xaddw %0, "__percpu_arg(1) \
278 : "+r" (paro_ret__), "+m" (var) \ 278 : "+r" (paro_ret__), "+m" (var) \
279 : : "memory"); \ 279 : : "memory"); \
280 break; \ 280 break; \
281 case 4: \ 281 case 4: \
282 asm("xaddl %0, "__percpu_arg(1) \ 282 asm qual ("xaddl %0, "__percpu_arg(1) \
283 : "+r" (paro_ret__), "+m" (var) \ 283 : "+r" (paro_ret__), "+m" (var) \
284 : : "memory"); \ 284 : : "memory"); \
285 break; \ 285 break; \
286 case 8: \ 286 case 8: \
287 asm("xaddq %0, "__percpu_arg(1) \ 287 asm qual ("xaddq %0, "__percpu_arg(1) \
288 : "+re" (paro_ret__), "+m" (var) \ 288 : "+re" (paro_ret__), "+m" (var) \
289 : : "memory"); \ 289 : : "memory"); \
290 break; \ 290 break; \
@@ -299,13 +299,13 @@ do { \
299 * expensive due to the implied lock prefix. The processor cannot prefetch 299 * expensive due to the implied lock prefix. The processor cannot prefetch
300 * cachelines if xchg is used. 300 * cachelines if xchg is used.
301 */ 301 */
302#define percpu_xchg_op(var, nval) \ 302#define percpu_xchg_op(qual, var, nval) \
303({ \ 303({ \
304 typeof(var) pxo_ret__; \ 304 typeof(var) pxo_ret__; \
305 typeof(var) pxo_new__ = (nval); \ 305 typeof(var) pxo_new__ = (nval); \
306 switch (sizeof(var)) { \ 306 switch (sizeof(var)) { \
307 case 1: \ 307 case 1: \
308 asm("\n\tmov "__percpu_arg(1)",%%al" \ 308 asm qual ("\n\tmov "__percpu_arg(1)",%%al" \
309 "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ 309 "\n1:\tcmpxchgb %2, "__percpu_arg(1) \
310 "\n\tjnz 1b" \ 310 "\n\tjnz 1b" \
311 : "=&a" (pxo_ret__), "+m" (var) \ 311 : "=&a" (pxo_ret__), "+m" (var) \
@@ -313,7 +313,7 @@ do { \
313 : "memory"); \ 313 : "memory"); \
314 break; \ 314 break; \
315 case 2: \ 315 case 2: \
316 asm("\n\tmov "__percpu_arg(1)",%%ax" \ 316 asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \
317 "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ 317 "\n1:\tcmpxchgw %2, "__percpu_arg(1) \
318 "\n\tjnz 1b" \ 318 "\n\tjnz 1b" \
319 : "=&a" (pxo_ret__), "+m" (var) \ 319 : "=&a" (pxo_ret__), "+m" (var) \
@@ -321,7 +321,7 @@ do { \
321 : "memory"); \ 321 : "memory"); \
322 break; \ 322 break; \
323 case 4: \ 323 case 4: \
324 asm("\n\tmov "__percpu_arg(1)",%%eax" \ 324 asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
325 "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ 325 "\n1:\tcmpxchgl %2, "__percpu_arg(1) \
326 "\n\tjnz 1b" \ 326 "\n\tjnz 1b" \
327 : "=&a" (pxo_ret__), "+m" (var) \ 327 : "=&a" (pxo_ret__), "+m" (var) \
@@ -329,7 +329,7 @@ do { \
329 : "memory"); \ 329 : "memory"); \
330 break; \ 330 break; \
331 case 8: \ 331 case 8: \
332 asm("\n\tmov "__percpu_arg(1)",%%rax" \ 332 asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
333 "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ 333 "\n1:\tcmpxchgq %2, "__percpu_arg(1) \
334 "\n\tjnz 1b" \ 334 "\n\tjnz 1b" \
335 : "=&a" (pxo_ret__), "+m" (var) \ 335 : "=&a" (pxo_ret__), "+m" (var) \
@@ -345,32 +345,32 @@ do { \
345 * cmpxchg has no such implied lock semantics as a result it is much 345 * cmpxchg has no such implied lock semantics as a result it is much
346 * more efficient for cpu local operations. 346 * more efficient for cpu local operations.
347 */ 347 */
348#define percpu_cmpxchg_op(var, oval, nval) \ 348#define percpu_cmpxchg_op(qual, var, oval, nval) \
349({ \ 349({ \
350 typeof(var) pco_ret__; \ 350 typeof(var) pco_ret__; \
351 typeof(var) pco_old__ = (oval); \ 351 typeof(var) pco_old__ = (oval); \
352 typeof(var) pco_new__ = (nval); \ 352 typeof(var) pco_new__ = (nval); \
353 switch (sizeof(var)) { \ 353 switch (sizeof(var)) { \
354 case 1: \ 354 case 1: \
355 asm("cmpxchgb %2, "__percpu_arg(1) \ 355 asm qual ("cmpxchgb %2, "__percpu_arg(1) \
356 : "=a" (pco_ret__), "+m" (var) \ 356 : "=a" (pco_ret__), "+m" (var) \
357 : "q" (pco_new__), "0" (pco_old__) \ 357 : "q" (pco_new__), "0" (pco_old__) \
358 : "memory"); \ 358 : "memory"); \
359 break; \ 359 break; \
360 case 2: \ 360 case 2: \
361 asm("cmpxchgw %2, "__percpu_arg(1) \ 361 asm qual ("cmpxchgw %2, "__percpu_arg(1) \
362 : "=a" (pco_ret__), "+m" (var) \ 362 : "=a" (pco_ret__), "+m" (var) \
363 : "r" (pco_new__), "0" (pco_old__) \ 363 : "r" (pco_new__), "0" (pco_old__) \
364 : "memory"); \ 364 : "memory"); \
365 break; \ 365 break; \
366 case 4: \ 366 case 4: \
367 asm("cmpxchgl %2, "__percpu_arg(1) \ 367 asm qual ("cmpxchgl %2, "__percpu_arg(1) \
368 : "=a" (pco_ret__), "+m" (var) \ 368 : "=a" (pco_ret__), "+m" (var) \
369 : "r" (pco_new__), "0" (pco_old__) \ 369 : "r" (pco_new__), "0" (pco_old__) \
370 : "memory"); \ 370 : "memory"); \
371 break; \ 371 break; \
372 case 8: \ 372 case 8: \
373 asm("cmpxchgq %2, "__percpu_arg(1) \ 373 asm qual ("cmpxchgq %2, "__percpu_arg(1) \
374 : "=a" (pco_ret__), "+m" (var) \ 374 : "=a" (pco_ret__), "+m" (var) \
375 : "r" (pco_new__), "0" (pco_old__) \ 375 : "r" (pco_new__), "0" (pco_old__) \
376 : "memory"); \ 376 : "memory"); \
@@ -391,58 +391,70 @@ do { \
391 */ 391 */
392#define this_cpu_read_stable(var) percpu_stable_op("mov", var) 392#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
393 393
394#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp) 394#define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp)
395#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp) 395#define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp)
396#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp) 396#define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp)
397 397
398#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 398#define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val)
399#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 399#define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val)
400#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 400#define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val)
401#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val) 401#define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val)
402#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val) 402#define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val)
403#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val) 403#define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val)
404#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 404#define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val)
405#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 405#define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val)
406#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 406#define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val)
407#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) 407#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val)
408#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) 408#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val)
409#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) 409#define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val)
410#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) 410
411#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) 411/*
412#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) 412 * raw_cpu_xchg() can use a load-store since it is not required to be
413 413 * IRQ-safe.
414#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp) 414 */
415#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp) 415#define raw_percpu_xchg_op(var, nval) \
416#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp) 416({ \
417#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 417 typeof(var) pxo_ret__ = raw_cpu_read(var); \
418#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 418 raw_cpu_write(var, (nval)); \
419#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 419 pxo_ret__; \
420#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) 420})
421#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) 421
422#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) 422#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
423#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 423#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
424#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 424#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
425#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 425
426#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) 426#define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp)
427#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) 427#define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp)
428#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) 428#define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp)
429#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) 429#define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
430#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) 430#define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
431#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) 431#define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
432 432#define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val)
433#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 433#define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val)
434#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 434#define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val)
435#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 435#define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
436#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 436#define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
437#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 437#define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
438#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 438#define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
439 439#define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
440#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 440#define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
441#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 441#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
442#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 442#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
443#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 443#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
444#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 444
445#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 445#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
446#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
447#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
448#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
449#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
450#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
451
452#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val)
453#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val)
454#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val)
455#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
456#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
457#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
446 458
447#ifdef CONFIG_X86_CMPXCHG64 459#ifdef CONFIG_X86_CMPXCHG64
448#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ 460#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
@@ -466,23 +478,23 @@ do { \
466 * 32 bit must fall back to generic operations. 478 * 32 bit must fall back to generic operations.
467 */ 479 */
468#ifdef CONFIG_X86_64 480#ifdef CONFIG_X86_64
469#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp) 481#define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp)
470#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 482#define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val)
471#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 483#define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val)
472#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 484#define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val)
473#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 485#define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val)
474#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 486#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val)
475#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 487#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
476#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 488#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
477 489
478#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp) 490#define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp)
479#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 491#define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
480#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 492#define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val)
481#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 493#define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
482#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 494#define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
483#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 495#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val)
484#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 496#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
485#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 497#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
486 498
487/* 499/*
488 * Pretty complex macro to generate cmpxchg16 instruction. The instruction 500 * Pretty complex macro to generate cmpxchg16 instruction. The instruction
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c34a35c78618..e57d2ca2ed87 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -144,7 +144,8 @@ enum cpuid_regs_idx {
144#define X86_VENDOR_TRANSMETA 7 144#define X86_VENDOR_TRANSMETA 7
145#define X86_VENDOR_NSC 8 145#define X86_VENDOR_NSC 8
146#define X86_VENDOR_HYGON 9 146#define X86_VENDOR_HYGON 9
147#define X86_VENDOR_NUM 10 147#define X86_VENDOR_ZHAOXIN 10
148#define X86_VENDOR_NUM 11
148 149
149#define X86_VENDOR_UNKNOWN 0xff 150#define X86_VENDOR_UNKNOWN 0xff
150 151
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index b6033680d458..19b695ff2c68 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -2,7 +2,7 @@
2#ifndef _ASM_X86_PVCLOCK_H 2#ifndef _ASM_X86_PVCLOCK_H
3#define _ASM_X86_PVCLOCK_H 3#define _ASM_X86_PVCLOCK_H
4 4
5#include <linux/clocksource.h> 5#include <asm/clocksource.h>
6#include <asm/pvclock-abi.h> 6#include <asm/pvclock-abi.h>
7 7
8/* some helper functions for xen and kvm pv clock sources */ 8/* some helper functions for xen and kvm pv clock sources */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index da545df207b2..0d3fe060a44f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -162,7 +162,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
162 * from the initial startup. We map APIC_BASE very early in page_setup(), 162 * from the initial startup. We map APIC_BASE very early in page_setup(),
163 * so this is correct in the x86 case. 163 * so this is correct in the x86 case.
164 */ 164 */
165#define raw_smp_processor_id() (this_cpu_read(cpu_number)) 165#define raw_smp_processor_id() this_cpu_read(cpu_number)
166#define __smp_processor_id() __this_cpu_read(cpu_number)
166 167
167#ifdef CONFIG_X86_32 168#ifdef CONFIG_X86_32
168extern int safe_smp_processor_id(void); 169extern int safe_smp_processor_id(void);
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 880b5515b1d6..d83e9f771d86 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,6 +18,20 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
18#define __parainstructions_end NULL 18#define __parainstructions_end NULL
19#endif 19#endif
20 20
21/*
22 * Currently, the max observed size in the kernel code is
23 * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
24 * Raise it if needed.
25 */
26#define POKE_MAX_OPCODE_SIZE 5
27
28struct text_poke_loc {
29 void *detour;
30 void *addr;
31 size_t len;
32 const char opcode[POKE_MAX_OPCODE_SIZE];
33};
34
21extern void text_poke_early(void *addr, const void *opcode, size_t len); 35extern void text_poke_early(void *addr, const void *opcode, size_t len);
22 36
23/* 37/*
@@ -38,6 +52,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
38extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); 52extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
39extern int poke_int3_handler(struct pt_regs *regs); 53extern int poke_int3_handler(struct pt_regs *regs);
40extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); 54extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
55extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
41extern int after_bootmem; 56extern int after_bootmem;
42extern __ro_after_init struct mm_struct *poking_mm; 57extern __ro_after_init struct mm_struct *poking_mm;
43extern __ro_after_init unsigned long poking_addr; 58extern __ro_after_init unsigned long poking_addr;
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index cef818b16045..8ac563abb567 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -7,6 +7,7 @@
7 7
8extern void hpet_time_init(void); 8extern void hpet_time_init(void);
9extern void time_init(void); 9extern void time_init(void);
10extern bool pit_timer_init(void);
10 11
11extern struct clock_event_device *global_clock_event; 12extern struct clock_event_device *global_clock_event;
12 13
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..ae91429129a6
--- /dev/null
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,261 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Fast user context implementation of clock_gettime, gettimeofday, and time.
4 *
5 * Copyright (C) 2019 ARM Limited.
6 * Copyright 2006 Andi Kleen, SUSE Labs.
7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9 */
10#ifndef __ASM_VDSO_GETTIMEOFDAY_H
11#define __ASM_VDSO_GETTIMEOFDAY_H
12
13#ifndef __ASSEMBLY__
14
15#include <uapi/linux/time.h>
16#include <asm/vgtod.h>
17#include <asm/vvar.h>
18#include <asm/unistd.h>
19#include <asm/msr.h>
20#include <asm/pvclock.h>
21#include <clocksource/hyperv_timer.h>
22
23#define __vdso_data (VVAR(_vdso_data))
24
25#define VDSO_HAS_TIME 1
26
27#define VDSO_HAS_CLOCK_GETRES 1
28
29/*
30 * Declare the memory-mapped vclock data pages. These come from hypervisors.
31 * If we ever reintroduce something like direct access to an MMIO clock like
32 * the HPET again, it will go here as well.
33 *
34 * A load from any of these pages will segfault if the clock in question is
35 * disabled, so appropriate compiler barriers and checks need to be used
36 * to prevent stray loads.
37 *
38 * These declarations MUST NOT be const. The compiler will assume that
39 * an extern const variable has genuinely constant contents, and the
40 * resulting code won't work, since the whole point is that these pages
41 * change over time, possibly while we're accessing them.
42 */
43
44#ifdef CONFIG_PARAVIRT_CLOCK
45/*
46 * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO
47 * if the hypervisor tells us that all vCPUs can get valid data from the
48 * vCPU 0 page.
49 */
50extern struct pvclock_vsyscall_time_info pvclock_page
51 __attribute__((visibility("hidden")));
52#endif
53
54#ifdef CONFIG_HYPERV_TSCPAGE
55extern struct ms_hyperv_tsc_page hvclock_page
56 __attribute__((visibility("hidden")));
57#endif
58
59#ifndef BUILD_VDSO32
60
61static __always_inline
62long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
63{
64 long ret;
65
66 asm ("syscall" : "=a" (ret), "=m" (*_ts) :
67 "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) :
68 "rcx", "r11");
69
70 return ret;
71}
72
73static __always_inline
74long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
75 struct timezone *_tz)
76{
77 long ret;
78
79 asm("syscall" : "=a" (ret) :
80 "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory");
81
82 return ret;
83}
84
85static __always_inline
86long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
87{
88 long ret;
89
90 asm ("syscall" : "=a" (ret), "=m" (*_ts) :
91 "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) :
92 "rcx", "r11");
93
94 return ret;
95}
96
97#else
98
99static __always_inline
100long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
101{
102 long ret;
103
104 asm (
105 "mov %%ebx, %%edx \n"
106 "mov %[clock], %%ebx \n"
107 "call __kernel_vsyscall \n"
108 "mov %%edx, %%ebx \n"
109 : "=a" (ret), "=m" (*_ts)
110 : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts)
111 : "edx");
112
113 return ret;
114}
115
116static __always_inline
117long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
118 struct timezone *_tz)
119{
120 long ret;
121
122 asm(
123 "mov %%ebx, %%edx \n"
124 "mov %2, %%ebx \n"
125 "call __kernel_vsyscall \n"
126 "mov %%edx, %%ebx \n"
127 : "=a" (ret)
128 : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz)
129 : "memory", "edx");
130
131 return ret;
132}
133
134static __always_inline long
135clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
136{
137 long ret;
138
139 asm (
140 "mov %%ebx, %%edx \n"
141 "mov %[clock], %%ebx \n"
142 "call __kernel_vsyscall \n"
143 "mov %%edx, %%ebx \n"
144 : "=a" (ret), "=m" (*_ts)
145 : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts)
146 : "edx");
147
148 return ret;
149}
150
151#endif
152
153#ifdef CONFIG_PARAVIRT_CLOCK
154static u64 vread_pvclock(void)
155{
156 const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
157 u32 version;
158 u64 ret;
159
160 /*
161 * Note: The kernel and hypervisor must guarantee that cpu ID
162 * number maps 1:1 to per-CPU pvclock time info.
163 *
164 * Because the hypervisor is entirely unaware of guest userspace
165 * preemption, it cannot guarantee that per-CPU pvclock time
166 * info is updated if the underlying CPU changes or that that
167 * version is increased whenever underlying CPU changes.
168 *
169 * On KVM, we are guaranteed that pvti updates for any vCPU are
170 * atomic as seen by *all* vCPUs. This is an even stronger
171 * guarantee than we get with a normal seqlock.
172 *
173 * On Xen, we don't appear to have that guarantee, but Xen still
174 * supplies a valid seqlock using the version field.
175 *
176 * We only do pvclock vdso timing at all if
177 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
178 * mean that all vCPUs have matching pvti and that the TSC is
179 * synced, so we can just look at vCPU 0's pvti.
180 */
181
182 do {
183 version = pvclock_read_begin(pvti);
184
185 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
186 return U64_MAX;
187
188 ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
189 } while (pvclock_read_retry(pvti, version));
190
191 return ret;
192}
193#endif
194
195#ifdef CONFIG_HYPERV_TSCPAGE
196static u64 vread_hvclock(void)
197{
198 return hv_read_tsc_page(&hvclock_page);
199}
200#endif
201
202static inline u64 __arch_get_hw_counter(s32 clock_mode)
203{
204 if (clock_mode == VCLOCK_TSC)
205 return (u64)rdtsc_ordered();
206 /*
207 * For any memory-mapped vclock type, we need to make sure that gcc
208 * doesn't cleverly hoist a load before the mode check. Otherwise we
209 * might end up touching the memory-mapped page even if the vclock in
210 * question isn't enabled, which will segfault. Hence the barriers.
211 */
212#ifdef CONFIG_PARAVIRT_CLOCK
213 if (clock_mode == VCLOCK_PVCLOCK) {
214 barrier();
215 return vread_pvclock();
216 }
217#endif
218#ifdef CONFIG_HYPERV_TSCPAGE
219 if (clock_mode == VCLOCK_HVCLOCK) {
220 barrier();
221 return vread_hvclock();
222 }
223#endif
224 return U64_MAX;
225}
226
227static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
228{
229 return __vdso_data;
230}
231
232/*
233 * x86 specific delta calculation.
234 *
235 * The regular implementation assumes that clocksource reads are globally
236 * monotonic. The TSC can be slightly off across sockets which can cause
237 * the regular delta calculation (@cycles - @last) to return a huge time
238 * jump.
239 *
240 * Therefore it needs to be verified that @cycles are greater than
241 * @last. If not then use @last, which is the base time of the current
242 * conversion period.
243 *
244 * This variant also removes the masking of the subtraction because the
245 * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
246 * which would result in a pointless operation. The compiler cannot
247 * optimize it away as the mask comes from the vdso data and is not compile
248 * time constant.
249 */
250static __always_inline
251u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
252{
253 if (cycles > last)
254 return (cycles - last) * mult;
255 return 0;
256}
257#define vdso_calc_delta vdso_calc_delta
258
259#endif /* !__ASSEMBLY__ */
260
261#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0026ab2123ce
--- /dev/null
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -0,0 +1,44 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __ASM_VDSO_VSYSCALL_H
3#define __ASM_VDSO_VSYSCALL_H
4
5#ifndef __ASSEMBLY__
6
7#include <linux/hrtimer.h>
8#include <linux/timekeeper_internal.h>
9#include <vdso/datapage.h>
10#include <asm/vgtod.h>
11#include <asm/vvar.h>
12
13int vclocks_used __read_mostly;
14
15DEFINE_VVAR(struct vdso_data, _vdso_data);
16/*
17 * Update the vDSO data page to keep in sync with kernel timekeeping.
18 */
19static __always_inline
20struct vdso_data *__x86_get_k_vdso_data(void)
21{
22 return _vdso_data;
23}
24#define __arch_get_k_vdso_data __x86_get_k_vdso_data
25
26static __always_inline
27int __x86_get_clock_mode(struct timekeeper *tk)
28{
29 int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
30
31 /* Mark the new vclock used. */
32 BUILD_BUG_ON(VCLOCK_MAX >= 32);
33 WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
34
35 return vclock_mode;
36}
37#define __arch_get_clock_mode __x86_get_clock_mode
38
39/* The asm-generic header needs to be included after the definitions above */
40#include <asm-generic/vdso/vsyscall.h>
41
42#endif /* !__ASSEMBLY__ */
43
44#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 913a133f8e6f..a2638c6124ed 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -3,7 +3,9 @@
3#define _ASM_X86_VGTOD_H 3#define _ASM_X86_VGTOD_H
4 4
5#include <linux/compiler.h> 5#include <linux/compiler.h>
6#include <linux/clocksource.h> 6#include <asm/clocksource.h>
7#include <vdso/datapage.h>
8#include <vdso/helpers.h>
7 9
8#include <uapi/linux/time.h> 10#include <uapi/linux/time.h>
9 11
@@ -13,81 +15,10 @@ typedef u64 gtod_long_t;
13typedef unsigned long gtod_long_t; 15typedef unsigned long gtod_long_t;
14#endif 16#endif
15 17
16/*
17 * There is one of these objects in the vvar page for each
18 * vDSO-accelerated clockid. For high-resolution clocks, this encodes
19 * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse
20 * clocks, this encodes the actual time.
21 *
22 * To confuse the reader, for high-resolution clocks, nsec is left-shifted
23 * by vsyscall_gtod_data.shift.
24 */
25struct vgtod_ts {
26 u64 sec;
27 u64 nsec;
28};
29
30#define VGTOD_BASES (CLOCK_TAI + 1)
31#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
32#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
33
34/*
35 * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
36 * so be carefull by modifying this structure.
37 */
38struct vsyscall_gtod_data {
39 unsigned int seq;
40
41 int vclock_mode;
42 u64 cycle_last;
43 u64 mask;
44 u32 mult;
45 u32 shift;
46
47 struct vgtod_ts basetime[VGTOD_BASES];
48
49 int tz_minuteswest;
50 int tz_dsttime;
51};
52extern struct vsyscall_gtod_data vsyscall_gtod_data;
53
54extern int vclocks_used; 18extern int vclocks_used;
55static inline bool vclock_was_used(int vclock) 19static inline bool vclock_was_used(int vclock)
56{ 20{
57 return READ_ONCE(vclocks_used) & (1 << vclock); 21 return READ_ONCE(vclocks_used) & (1 << vclock);
58} 22}
59 23
60static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s)
61{
62 unsigned int ret;
63
64repeat:
65 ret = READ_ONCE(s->seq);
66 if (unlikely(ret & 1)) {
67 cpu_relax();
68 goto repeat;
69 }
70 smp_rmb();
71 return ret;
72}
73
74static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
75 unsigned int start)
76{
77 smp_rmb();
78 return unlikely(s->seq != start);
79}
80
81static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
82{
83 ++s->seq;
84 smp_wmb();
85}
86
87static inline void gtod_write_end(struct vsyscall_gtod_data *s)
88{
89 smp_wmb();
90 ++s->seq;
91}
92
93#endif /* _ASM_X86_VGTOD_H */ 24#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index b986b2ca688a..ab60a71a8dcb 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
13 * Called on instruction fetch fault in vsyscall page. 13 * Called on instruction fetch fault in vsyscall page.
14 * Returns true if handled. 14 * Returns true if handled.
15 */ 15 */
16extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); 16extern bool emulate_vsyscall(unsigned long error_code,
17 struct pt_regs *regs, unsigned long address);
17#else 18#else
18static inline void map_vsyscall(void) {} 19static inline void map_vsyscall(void) {}
19static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) 20static inline bool emulate_vsyscall(unsigned long error_code,
21 struct pt_regs *regs, unsigned long address)
20{ 22{
21 return false; 23 return false;
22} 24}
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index e474f5c6e387..32f5d9a0b90e 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -32,19 +32,20 @@
32extern char __vvar_page; 32extern char __vvar_page;
33 33
34#define DECLARE_VVAR(offset, type, name) \ 34#define DECLARE_VVAR(offset, type, name) \
35 extern type vvar_ ## name __attribute__((visibility("hidden"))); 35 extern type vvar_ ## name[CS_BASES] \
36 __attribute__((visibility("hidden")));
36 37
37#define VVAR(name) (vvar_ ## name) 38#define VVAR(name) (vvar_ ## name)
38 39
39#define DEFINE_VVAR(type, name) \ 40#define DEFINE_VVAR(type, name) \
40 type name \ 41 type name[CS_BASES] \
41 __attribute__((section(".vvar_" #name), aligned(16))) __visible 42 __attribute__((section(".vvar_" #name), aligned(16))) __visible
42 43
43#endif 44#endif
44 45
45/* DECLARE_VVAR(offset, type, name) */ 46/* DECLARE_VVAR(offset, type, name) */
46 47
47DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) 48DECLARE_VVAR(128, struct vdso_data, _vdso_data)
48 49
49#undef DECLARE_VVAR 50#undef DECLARE_VVAR
50 51
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index ac67bbea10ca..7c9d2bb3833b 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -52,4 +52,7 @@ enum perf_event_x86_regs {
52 /* These include both GPRs and XMMX registers */ 52 /* These include both GPRs and XMMX registers */
53 PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, 53 PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
54}; 54};
55
56#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
57
55#endif /* _ASM_X86_PERF_REGS_H */ 58#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index a5e5484988fd..caf2edccbad2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -64,6 +64,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
64 c->x86_stepping >= 0x0e)) 64 c->x86_stepping >= 0x0e))
65 flags->bm_check = 1; 65 flags->bm_check = 1;
66 } 66 }
67
68 if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
69 /*
70 * All Zhaoxin CPUs that support C3 share cache.
71 * And caches should not be flushed by software while
72 * entering C3 type state.
73 */
74 flags->bm_check = 1;
75 /*
76 * On all recent Zhaoxin platforms, ARB_DISABLE is a nop.
77 * So, set bm_control to zero to indicate that ARB_DISABLE
78 * is not required while entering C3 type state.
79 */
80 flags->bm_control = 0;
81 }
67} 82}
68EXPORT_SYMBOL(acpi_processor_power_init_bm_check); 83EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
69 84
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 390596b761e3..bd542f9b0953 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -14,6 +14,7 @@
14#include <linux/kdebug.h> 14#include <linux/kdebug.h>
15#include <linux/kprobes.h> 15#include <linux/kprobes.h>
16#include <linux/mmu_context.h> 16#include <linux/mmu_context.h>
17#include <linux/bsearch.h>
17#include <asm/text-patching.h> 18#include <asm/text-patching.h>
18#include <asm/alternative.h> 19#include <asm/alternative.h>
19#include <asm/sections.h> 20#include <asm/sections.h>
@@ -848,81 +849,133 @@ static void do_sync_core(void *info)
848 sync_core(); 849 sync_core();
849} 850}
850 851
851static bool bp_patching_in_progress; 852static struct bp_patching_desc {
852static void *bp_int3_handler, *bp_int3_addr; 853 struct text_poke_loc *vec;
854 int nr_entries;
855} bp_patching;
856
857static int patch_cmp(const void *key, const void *elt)
858{
859 struct text_poke_loc *tp = (struct text_poke_loc *) elt;
860
861 if (key < tp->addr)
862 return -1;
863 if (key > tp->addr)
864 return 1;
865 return 0;
866}
867NOKPROBE_SYMBOL(patch_cmp);
853 868
854int poke_int3_handler(struct pt_regs *regs) 869int poke_int3_handler(struct pt_regs *regs)
855{ 870{
871 struct text_poke_loc *tp;
872 unsigned char int3 = 0xcc;
873 void *ip;
874
856 /* 875 /*
857 * Having observed our INT3 instruction, we now must observe 876 * Having observed our INT3 instruction, we now must observe
858 * bp_patching_in_progress. 877 * bp_patching.nr_entries.
859 * 878 *
860 * in_progress = TRUE INT3 879 * nr_entries != 0 INT3
861 * WMB RMB 880 * WMB RMB
862 * write INT3 if (in_progress) 881 * write INT3 if (nr_entries)
863 * 882 *
864 * Idem for bp_int3_handler. 883 * Idem for other elements in bp_patching.
865 */ 884 */
866 smp_rmb(); 885 smp_rmb();
867 886
868 if (likely(!bp_patching_in_progress)) 887 if (likely(!bp_patching.nr_entries))
869 return 0; 888 return 0;
870 889
871 if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr) 890 if (user_mode(regs))
872 return 0; 891 return 0;
873 892
874 /* set up the specified breakpoint handler */ 893 /*
875 regs->ip = (unsigned long) bp_int3_handler; 894 * Discount the sizeof(int3). See text_poke_bp_batch().
895 */
896 ip = (void *) regs->ip - sizeof(int3);
897
898 /*
899 * Skip the binary search if there is a single member in the vector.
900 */
901 if (unlikely(bp_patching.nr_entries > 1)) {
902 tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
903 sizeof(struct text_poke_loc),
904 patch_cmp);
905 if (!tp)
906 return 0;
907 } else {
908 tp = bp_patching.vec;
909 if (tp->addr != ip)
910 return 0;
911 }
912
913 /* set up the specified breakpoint detour */
914 regs->ip = (unsigned long) tp->detour;
876 915
877 return 1; 916 return 1;
878} 917}
879NOKPROBE_SYMBOL(poke_int3_handler); 918NOKPROBE_SYMBOL(poke_int3_handler);
880 919
881/** 920/**
882 * text_poke_bp() -- update instructions on live kernel on SMP 921 * text_poke_bp_batch() -- update instructions on live kernel on SMP
883 * @addr: address to patch 922 * @tp: vector of instructions to patch
884 * @opcode: opcode of new instruction 923 * @nr_entries: number of entries in the vector
885 * @len: length to copy
886 * @handler: address to jump to when the temporary breakpoint is hit
887 * 924 *
888 * Modify multi-byte instruction by using int3 breakpoint on SMP. 925 * Modify multi-byte instruction by using int3 breakpoint on SMP.
889 * We completely avoid stop_machine() here, and achieve the 926 * We completely avoid stop_machine() here, and achieve the
890 * synchronization using int3 breakpoint. 927 * synchronization using int3 breakpoint.
891 * 928 *
892 * The way it is done: 929 * The way it is done:
893 * - add a int3 trap to the address that will be patched 930 * - For each entry in the vector:
931 * - add a int3 trap to the address that will be patched
894 * - sync cores 932 * - sync cores
895 * - update all but the first byte of the patched range 933 * - For each entry in the vector:
934 * - update all but the first byte of the patched range
896 * - sync cores 935 * - sync cores
897 * - replace the first byte (int3) by the first byte of 936 * - For each entry in the vector:
898 * replacing opcode 937 * - replace the first byte (int3) by the first byte of
938 * replacing opcode
899 * - sync cores 939 * - sync cores
900 */ 940 */
901void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) 941void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
902{ 942{
943 int patched_all_but_first = 0;
903 unsigned char int3 = 0xcc; 944 unsigned char int3 = 0xcc;
904 945 unsigned int i;
905 bp_int3_handler = handler;
906 bp_int3_addr = (u8 *)addr + sizeof(int3);
907 bp_patching_in_progress = true;
908 946
909 lockdep_assert_held(&text_mutex); 947 lockdep_assert_held(&text_mutex);
910 948
949 bp_patching.vec = tp;
950 bp_patching.nr_entries = nr_entries;
951
911 /* 952 /*
912 * Corresponding read barrier in int3 notifier for making sure the 953 * Corresponding read barrier in int3 notifier for making sure the
913 * in_progress and handler are correctly ordered wrt. patching. 954 * nr_entries and handler are correctly ordered wrt. patching.
914 */ 955 */
915 smp_wmb(); 956 smp_wmb();
916 957
917 text_poke(addr, &int3, sizeof(int3)); 958 /*
959 * First step: add a int3 trap to the address that will be patched.
960 */
961 for (i = 0; i < nr_entries; i++)
962 text_poke(tp[i].addr, &int3, sizeof(int3));
918 963
919 on_each_cpu(do_sync_core, NULL, 1); 964 on_each_cpu(do_sync_core, NULL, 1);
920 965
921 if (len - sizeof(int3) > 0) { 966 /*
922 /* patch all but the first byte */ 967 * Second step: update all but the first byte of the patched range.
923 text_poke((char *)addr + sizeof(int3), 968 */
924 (const char *) opcode + sizeof(int3), 969 for (i = 0; i < nr_entries; i++) {
925 len - sizeof(int3)); 970 if (tp[i].len - sizeof(int3) > 0) {
971 text_poke((char *)tp[i].addr + sizeof(int3),
972 (const char *)tp[i].opcode + sizeof(int3),
973 tp[i].len - sizeof(int3));
974 patched_all_but_first++;
975 }
976 }
977
978 if (patched_all_but_first) {
926 /* 979 /*
927 * According to Intel, this core syncing is very likely 980 * According to Intel, this core syncing is very likely
928 * not necessary and we'd be safe even without it. But 981 * not necessary and we'd be safe even without it. But
@@ -931,14 +984,47 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
931 on_each_cpu(do_sync_core, NULL, 1); 984 on_each_cpu(do_sync_core, NULL, 1);
932 } 985 }
933 986
934 /* patch the first byte */ 987 /*
935 text_poke(addr, opcode, sizeof(int3)); 988 * Third step: replace the first byte (int3) by the first byte of
989 * replacing opcode.
990 */
991 for (i = 0; i < nr_entries; i++)
992 text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
936 993
937 on_each_cpu(do_sync_core, NULL, 1); 994 on_each_cpu(do_sync_core, NULL, 1);
938 /* 995 /*
939 * sync_core() implies an smp_mb() and orders this store against 996 * sync_core() implies an smp_mb() and orders this store against
940 * the writing of the new instruction. 997 * the writing of the new instruction.
941 */ 998 */
942 bp_patching_in_progress = false; 999 bp_patching.vec = NULL;
1000 bp_patching.nr_entries = 0;
943} 1001}
944 1002
1003/**
1004 * text_poke_bp() -- update instructions on live kernel on SMP
1005 * @addr: address to patch
1006 * @opcode: opcode of new instruction
1007 * @len: length to copy
1008 * @handler: address to jump to when the temporary breakpoint is hit
1009 *
1010 * Update a single instruction with the vector in the stack, avoiding
1011 * dynamically allocated memory. This function should be used when it is
1012 * not possible to allocate memory.
1013 */
1014void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
1015{
1016 struct text_poke_loc tp = {
1017 .detour = handler,
1018 .addr = addr,
1019 .len = len,
1020 };
1021
1022 if (len > POKE_MAX_OPCODE_SIZE) {
1023 WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
1024 return;
1025 }
1026
1027 memcpy((void *)tp.opcode, opcode, len);
1028
1029 text_poke_bp_batch(&tp, 1);
1030}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 177aa8ef2afa..1bd91cb7b320 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -195,7 +195,7 @@ static struct resource lapic_resource = {
195 .flags = IORESOURCE_MEM | IORESOURCE_BUSY, 195 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
196}; 196};
197 197
198unsigned int lapic_timer_frequency = 0; 198unsigned int lapic_timer_period = 0;
199 199
200static void apic_pm_activate(void); 200static void apic_pm_activate(void);
201 201
@@ -501,7 +501,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
501 if (evt->features & CLOCK_EVT_FEAT_DUMMY) 501 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
502 return 0; 502 return 0;
503 503
504 __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1); 504 __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
505 return 0; 505 return 0;
506} 506}
507 507
@@ -805,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
805 805
806static int __init lapic_init_clockevent(void) 806static int __init lapic_init_clockevent(void)
807{ 807{
808 if (!lapic_timer_frequency) 808 if (!lapic_timer_period)
809 return -1; 809 return -1;
810 810
811 /* Calculate the scaled math multiplication factor */ 811 /* Calculate the scaled math multiplication factor */
812 lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, 812 lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
813 TICK_NSEC, lapic_clockevent.shift); 813 TICK_NSEC, lapic_clockevent.shift);
814 lapic_clockevent.max_delta_ns = 814 lapic_clockevent.max_delta_ns =
815 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); 815 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
@@ -821,6 +821,33 @@ static int __init lapic_init_clockevent(void)
821 return 0; 821 return 0;
822} 822}
823 823
824bool __init apic_needs_pit(void)
825{
826 /*
827 * If the frequencies are not known, PIT is required for both TSC
828 * and apic timer calibration.
829 */
830 if (!tsc_khz || !cpu_khz)
831 return true;
832
833 /* Is there an APIC at all? */
834 if (!boot_cpu_has(X86_FEATURE_APIC))
835 return true;
836
837 /* Deadline timer is based on TSC so no further PIT action required */
838 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
839 return false;
840
841 /* APIC timer disabled? */
842 if (disable_apic_timer)
843 return true;
844 /*
845 * The APIC timer frequency is known already, no PIT calibration
846 * required. If unknown, let the PIT be initialized.
847 */
848 return lapic_timer_period == 0;
849}
850
824static int __init calibrate_APIC_clock(void) 851static int __init calibrate_APIC_clock(void)
825{ 852{
826 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 853 struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
@@ -839,7 +866,7 @@ static int __init calibrate_APIC_clock(void)
839 */ 866 */
840 if (!lapic_init_clockevent()) { 867 if (!lapic_init_clockevent()) {
841 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", 868 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
842 lapic_timer_frequency); 869 lapic_timer_period);
843 /* 870 /*
844 * Direct calibration methods must have an always running 871 * Direct calibration methods must have an always running
845 * local APIC timer, no need for broadcast timer. 872 * local APIC timer, no need for broadcast timer.
@@ -884,13 +911,13 @@ static int __init calibrate_APIC_clock(void)
884 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, 911 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
885 &delta, &deltatsc); 912 &delta, &deltatsc);
886 913
887 lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; 914 lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
888 lapic_init_clockevent(); 915 lapic_init_clockevent();
889 916
890 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); 917 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
891 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); 918 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
892 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", 919 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
893 lapic_timer_frequency); 920 lapic_timer_period);
894 921
895 if (boot_cpu_has(X86_FEATURE_TSC)) { 922 if (boot_cpu_has(X86_FEATURE_TSC)) {
896 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 923 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
@@ -901,13 +928,13 @@ static int __init calibrate_APIC_clock(void)
901 928
902 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 929 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
903 "%u.%04u MHz.\n", 930 "%u.%04u MHz.\n",
904 lapic_timer_frequency / (1000000 / HZ), 931 lapic_timer_period / (1000000 / HZ),
905 lapic_timer_frequency % (1000000 / HZ)); 932 lapic_timer_period % (1000000 / HZ));
906 933
907 /* 934 /*
908 * Do a sanity check on the APIC calibration result 935 * Do a sanity check on the APIC calibration result
909 */ 936 */
910 if (lapic_timer_frequency < (1000000 / HZ)) { 937 if (lapic_timer_period < (1000000 / HZ)) {
911 local_irq_enable(); 938 local_irq_enable();
912 pr_warning("APIC frequency too slow, disabling apic timer\n"); 939 pr_warning("APIC frequency too slow, disabling apic timer\n");
913 return -1; 940 return -1;
@@ -1351,6 +1378,8 @@ void __init init_bsp_APIC(void)
1351 apic_write(APIC_LVT1, value); 1378 apic_write(APIC_LVT1, value);
1352} 1379}
1353 1380
1381static void __init apic_bsp_setup(bool upmode);
1382
1354/* Init the interrupt delivery mode for the BSP */ 1383/* Init the interrupt delivery mode for the BSP */
1355void __init apic_intr_mode_init(void) 1384void __init apic_intr_mode_init(void)
1356{ 1385{
@@ -1464,7 +1493,8 @@ static void apic_pending_intr_clear(void)
1464 if (queued) { 1493 if (queued) {
1465 if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { 1494 if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
1466 ntsc = rdtsc(); 1495 ntsc = rdtsc();
1467 max_loops = (cpu_khz << 10) - (ntsc - tsc); 1496 max_loops = (long long)cpu_khz << 10;
1497 max_loops -= ntsc - tsc;
1468 } else { 1498 } else {
1469 max_loops--; 1499 max_loops--;
1470 } 1500 }
@@ -2040,21 +2070,32 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
2040 entering_irq(); 2070 entering_irq();
2041 trace_spurious_apic_entry(vector); 2071 trace_spurious_apic_entry(vector);
2042 2072
2073 inc_irq_stat(irq_spurious_count);
2074
2075 /*
2076 * If this is a spurious interrupt then do not acknowledge
2077 */
2078 if (vector == SPURIOUS_APIC_VECTOR) {
2079 /* See SDM vol 3 */
2080 pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
2081 smp_processor_id());
2082 goto out;
2083 }
2084
2043 /* 2085 /*
2044 * Check if this really is a spurious interrupt and ACK it 2086 * If it is a vectored one, verify it's set in the ISR. If set,
2045 * if it is a vectored one. Just in case... 2087 * acknowledge it.
2046 * Spurious interrupts should not be ACKed.
2047 */ 2088 */
2048 v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); 2089 v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
2049 if (v & (1 << (vector & 0x1f))) 2090 if (v & (1 << (vector & 0x1f))) {
2091 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
2092 vector, smp_processor_id());
2050 ack_APIC_irq(); 2093 ack_APIC_irq();
2051 2094 } else {
2052 inc_irq_stat(irq_spurious_count); 2095 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
2053 2096 vector, smp_processor_id());
2054 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 2097 }
2055 pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " 2098out:
2056 "should never happen.\n", vector, smp_processor_id());
2057
2058 trace_spurious_apic_exit(vector); 2099 trace_spurious_apic_exit(vector);
2059 exiting_irq(); 2100 exiting_irq();
2060} 2101}
@@ -2415,11 +2456,8 @@ static void __init apic_bsp_up_setup(void)
2415/** 2456/**
2416 * apic_bsp_setup - Setup function for local apic and io-apic 2457 * apic_bsp_setup - Setup function for local apic and io-apic
2417 * @upmode: Force UP mode (for APIC_init_uniprocessor) 2458 * @upmode: Force UP mode (for APIC_init_uniprocessor)
2418 *
2419 * Returns:
2420 * apic_id of BSP APIC
2421 */ 2459 */
2422void __init apic_bsp_setup(bool upmode) 2460static void __init apic_bsp_setup(bool upmode)
2423{ 2461{
2424 connect_bsp_APIC(); 2462 connect_bsp_APIC();
2425 if (upmode) 2463 if (upmode)
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index bf083c3f1d73..bbdca603f94a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -78,7 +78,7 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
78 int cpu = smp_processor_id(); 78 int cpu = smp_processor_id();
79 79
80 if (cpu < BITS_PER_LONG) 80 if (cpu < BITS_PER_LONG)
81 clear_bit(cpu, &mask); 81 __clear_bit(cpu, &mask);
82 82
83 _flat_send_IPI_mask(mask, vector); 83 _flat_send_IPI_mask(mask, vector);
84} 84}
@@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int vector)
92 unsigned long mask = cpumask_bits(cpu_online_mask)[0]; 92 unsigned long mask = cpumask_bits(cpu_online_mask)[0];
93 93
94 if (cpu < BITS_PER_LONG) 94 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask); 95 __clear_bit(cpu, &mask);
96 96
97 _flat_send_IPI_mask(mask, vector); 97 _flat_send_IPI_mask(mask, vector);
98 } 98 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53aa234a6803..c7bb6c69f21c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -58,6 +58,7 @@
58#include <asm/acpi.h> 58#include <asm/acpi.h>
59#include <asm/dma.h> 59#include <asm/dma.h>
60#include <asm/timer.h> 60#include <asm/timer.h>
61#include <asm/time.h>
61#include <asm/i8259.h> 62#include <asm/i8259.h>
62#include <asm/setup.h> 63#include <asm/setup.h>
63#include <asm/irq_remapping.h> 64#include <asm/irq_remapping.h>
@@ -1893,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data,
1893 return ret; 1894 return ret;
1894} 1895}
1895 1896
1897/*
1898 * Interrupt shutdown masks the ioapic pin, but the interrupt might already
1899 * be in flight, but not yet serviced by the target CPU. That means
1900 * __synchronize_hardirq() would return and claim that everything is calmed
1901 * down. So free_irq() would proceed and deactivate the interrupt and free
1902 * resources.
1903 *
1904 * Once the target CPU comes around to service it it will find a cleared
1905 * vector and complain. While the spurious interrupt is harmless, the full
1906 * release of resources might prevent the interrupt from being acknowledged
1907 * which keeps the hardware in a weird state.
1908 *
1909 * Verify that the corresponding Remote-IRR bits are clear.
1910 */
1911static int ioapic_irq_get_chip_state(struct irq_data *irqd,
1912 enum irqchip_irq_state which,
1913 bool *state)
1914{
1915 struct mp_chip_data *mcd = irqd->chip_data;
1916 struct IO_APIC_route_entry rentry;
1917 struct irq_pin_list *p;
1918
1919 if (which != IRQCHIP_STATE_ACTIVE)
1920 return -EINVAL;
1921
1922 *state = false;
1923 raw_spin_lock(&ioapic_lock);
1924 for_each_irq_pin(p, mcd->irq_2_pin) {
1925 rentry = __ioapic_read_entry(p->apic, p->pin);
1926 /*
1927 * The remote IRR is only valid in level trigger mode. It's
1928 * meaning is undefined for edge triggered interrupts and
1929 * irrelevant because the IO-APIC treats them as fire and
1930 * forget.
1931 */
1932 if (rentry.irr && rentry.trigger) {
1933 *state = true;
1934 break;
1935 }
1936 }
1937 raw_spin_unlock(&ioapic_lock);
1938 return 0;
1939}
1940
1896static struct irq_chip ioapic_chip __read_mostly = { 1941static struct irq_chip ioapic_chip __read_mostly = {
1897 .name = "IO-APIC", 1942 .name = "IO-APIC",
1898 .irq_startup = startup_ioapic_irq, 1943 .irq_startup = startup_ioapic_irq,
@@ -1902,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
1902 .irq_eoi = ioapic_ack_level, 1947 .irq_eoi = ioapic_ack_level,
1903 .irq_set_affinity = ioapic_set_affinity, 1948 .irq_set_affinity = ioapic_set_affinity,
1904 .irq_retrigger = irq_chip_retrigger_hierarchy, 1949 .irq_retrigger = irq_chip_retrigger_hierarchy,
1950 .irq_get_irqchip_state = ioapic_irq_get_chip_state,
1905 .flags = IRQCHIP_SKIP_SET_WAKE, 1951 .flags = IRQCHIP_SKIP_SET_WAKE,
1906}; 1952};
1907 1953
@@ -1914,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
1914 .irq_eoi = ioapic_ir_ack_level, 1960 .irq_eoi = ioapic_ir_ack_level,
1915 .irq_set_affinity = ioapic_set_affinity, 1961 .irq_set_affinity = ioapic_set_affinity,
1916 .irq_retrigger = irq_chip_retrigger_hierarchy, 1962 .irq_retrigger = irq_chip_retrigger_hierarchy,
1963 .irq_get_irqchip_state = ioapic_irq_get_chip_state,
1917 .flags = IRQCHIP_SKIP_SET_WAKE, 1964 .flags = IRQCHIP_SKIP_SET_WAKE,
1918}; 1965};
1919 1966
@@ -2083,6 +2130,9 @@ static inline void __init check_timer(void)
2083 unsigned long flags; 2130 unsigned long flags;
2084 int no_pin1 = 0; 2131 int no_pin1 = 0;
2085 2132
2133 if (!global_clock_event)
2134 return;
2135
2086 local_irq_save(flags); 2136 local_irq_save(flags);
2087 2137
2088 /* 2138 /*
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index dad0dd759de2..7f7533462474 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -370,14 +370,14 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
370 return d; 370 return d;
371} 371}
372 372
373int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, 373int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
374 int dev_num) 374 int dev_num)
375{ 375{
376 struct irq_alloc_info info; 376 struct irq_alloc_info info;
377 377
378 init_irq_alloc_info(&info, NULL); 378 init_irq_alloc_info(&info, NULL);
379 info.type = X86_IRQ_ALLOC_TYPE_HPET; 379 info.type = X86_IRQ_ALLOC_TYPE_HPET;
380 info.hpet_data = dev; 380 info.hpet_data = hc;
381 info.hpet_id = hpet_dev_id(domain); 381 info.hpet_id = hpet_dev_id(domain);
382 info.hpet_index = dev_num; 382 info.hpet_index = dev_num;
383 383
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index e7cb78aed644..fdacb864c3dd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -340,7 +340,7 @@ static void clear_irq_vector(struct irq_data *irqd)
340 trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, 340 trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
341 apicd->prev_cpu); 341 apicd->prev_cpu);
342 342
343 per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; 343 per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
344 irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); 344 irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
345 apicd->vector = 0; 345 apicd->vector = 0;
346 346
@@ -349,7 +349,7 @@ static void clear_irq_vector(struct irq_data *irqd)
349 if (!vector) 349 if (!vector)
350 return; 350 return;
351 351
352 per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; 352 per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
353 irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); 353 irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
354 apicd->prev_vector = 0; 354 apicd->prev_vector = 0;
355 apicd->move_in_progress = 0; 355 apicd->move_in_progress = 0;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 7685444a106b..609e499387a1 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -50,7 +50,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
50 cpumask_copy(tmpmsk, mask); 50 cpumask_copy(tmpmsk, mask);
51 /* If IPI should not be sent to self, clear current CPU */ 51 /* If IPI should not be sent to self, clear current CPU */
52 if (apic_dest != APIC_DEST_ALLINC) 52 if (apic_dest != APIC_DEST_ALLINC)
53 cpumask_clear_cpu(smp_processor_id(), tmpmsk); 53 __cpumask_clear_cpu(smp_processor_id(), tmpmsk);
54 54
55 /* Collapse cpus in a cluster so a single IPI per cluster is sent */ 55 /* Collapse cpus in a cluster so a single IPI per cluster is sent */
56 for_each_cpu(cpu, tmpmsk) { 56 for_each_cpu(cpu, tmpmsk) {
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 5102bf7c8192..4b4eb06e117c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -24,6 +24,7 @@ obj-y += match.o
24obj-y += bugs.o 24obj-y += bugs.o
25obj-y += aperfmperf.o 25obj-y += aperfmperf.o
26obj-y += cpuid-deps.o 26obj-y += cpuid-deps.o
27obj-y += umwait.o
27 28
28obj-$(CONFIG_PROC_FS) += proc.o 29obj-$(CONFIG_PROC_FS) += proc.o
29obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o 30obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
@@ -38,6 +39,7 @@ obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
38obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o 39obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
39obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 40obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
40obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 41obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
42obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o
41 43
42obj-$(CONFIG_X86_MCE) += mce/ 44obj-$(CONFIG_X86_MCE) += mce/
43obj-$(CONFIG_MTRR) += mtrr/ 45obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index e71a6ff8a67e..e2f319dc992d 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -13,6 +13,7 @@
13#include <linux/percpu.h> 13#include <linux/percpu.h>
14#include <linux/cpufreq.h> 14#include <linux/cpufreq.h>
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/sched/isolation.h>
16 17
17#include "cpu.h" 18#include "cpu.h"
18 19
@@ -85,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu)
85 if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) 86 if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
86 return 0; 87 return 0;
87 88
89 if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
90 return 0;
91
88 aperfmperf_snapshot_cpu(cpu, ktime_get(), true); 92 aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
89 return per_cpu(samples.khz, cpu); 93 return per_cpu(samples.khz, cpu);
90} 94}
@@ -101,9 +105,12 @@ void arch_freq_prepare_all(void)
101 if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) 105 if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
102 return; 106 return;
103 107
104 for_each_online_cpu(cpu) 108 for_each_online_cpu(cpu) {
109 if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
110 continue;
105 if (!aperfmperf_snapshot_cpu(cpu, now, false)) 111 if (!aperfmperf_snapshot_cpu(cpu, now, false))
106 wait = true; 112 wait = true;
113 }
107 114
108 if (wait) 115 if (wait)
109 msleep(APERFMPERF_REFRESH_DELAY_MS); 116 msleep(APERFMPERF_REFRESH_DELAY_MS);
@@ -117,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu)
117 if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) 124 if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
118 return 0; 125 return 0;
119 126
127 if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
128 return 0;
129
120 if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) 130 if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
121 return per_cpu(samples.khz, cpu); 131 return per_cpu(samples.khz, cpu);
122 132
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03b4cc0ec3a7..66ca906aa790 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
836 } 836 }
837 837
838 /* 838 /*
839 * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
840 * bit in the mask to allow guests to use the mitigation even in the
841 * case where the host does not enable it.
842 */
843 if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
844 static_cpu_has(X86_FEATURE_AMD_SSBD)) {
845 x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
846 }
847
848 /*
839 * We have three CPU feature flags that are in play here: 849 * We have three CPU feature flags that are in play here:
840 * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. 850 * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
841 * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass 851 * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
852 x86_amd_ssb_disable(); 862 x86_amd_ssb_disable();
853 } else { 863 } else {
854 x86_spec_ctrl_base |= SPEC_CTRL_SSBD; 864 x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
855 x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
856 wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); 865 wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
857 } 866 }
858 } 867 }
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 395d46f78582..c7503be92f35 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
658 if (c->x86 < 0x17) { 658 if (c->x86 < 0x17) {
659 /* LLC is at the node level. */ 659 /* LLC is at the node level. */
660 per_cpu(cpu_llc_id, cpu) = node_id; 660 per_cpu(cpu_llc_id, cpu) = node_id;
661 } else if (c->x86 == 0x17 && 661 } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
662 c->x86_model >= 0 && c->x86_model <= 0x1F) {
663 /* 662 /*
664 * LLC is at the core complex level. 663 * LLC is at the core complex level.
665 * Core complex ID is ApicId[3] for these processors. 664 * Core complex ID is ApicId[3] for these processors.
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2c57fffebf9b..dad20bc891d5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -801,6 +801,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
801 } 801 }
802} 802}
803 803
804static void init_cqm(struct cpuinfo_x86 *c)
805{
806 if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
807 c->x86_cache_max_rmid = -1;
808 c->x86_cache_occ_scale = -1;
809 return;
810 }
811
812 /* will be overridden if occupancy monitoring exists */
813 c->x86_cache_max_rmid = cpuid_ebx(0xf);
814
815 if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
816 cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
817 cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
818 u32 eax, ebx, ecx, edx;
819
820 /* QoS sub-leaf, EAX=0Fh, ECX=1 */
821 cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
822
823 c->x86_cache_max_rmid = ecx;
824 c->x86_cache_occ_scale = ebx;
825 }
826}
827
804void get_cpu_cap(struct cpuinfo_x86 *c) 828void get_cpu_cap(struct cpuinfo_x86 *c)
805{ 829{
806 u32 eax, ebx, ecx, edx; 830 u32 eax, ebx, ecx, edx;
@@ -823,6 +847,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
823 c->x86_capability[CPUID_7_0_EBX] = ebx; 847 c->x86_capability[CPUID_7_0_EBX] = ebx;
824 c->x86_capability[CPUID_7_ECX] = ecx; 848 c->x86_capability[CPUID_7_ECX] = ecx;
825 c->x86_capability[CPUID_7_EDX] = edx; 849 c->x86_capability[CPUID_7_EDX] = edx;
850
851 /* Check valid sub-leaf index before accessing it */
852 if (eax >= 1) {
853 cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
854 c->x86_capability[CPUID_7_1_EAX] = eax;
855 }
826 } 856 }
827 857
828 /* Extended state features: level 0x0000000d */ 858 /* Extended state features: level 0x0000000d */
@@ -832,33 +862,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
832 c->x86_capability[CPUID_D_1_EAX] = eax; 862 c->x86_capability[CPUID_D_1_EAX] = eax;
833 } 863 }
834 864
835 /* Additional Intel-defined flags: level 0x0000000F */
836 if (c->cpuid_level >= 0x0000000F) {
837
838 /* QoS sub-leaf, EAX=0Fh, ECX=0 */
839 cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
840 c->x86_capability[CPUID_F_0_EDX] = edx;
841
842 if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
843 /* will be overridden if occupancy monitoring exists */
844 c->x86_cache_max_rmid = ebx;
845
846 /* QoS sub-leaf, EAX=0Fh, ECX=1 */
847 cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
848 c->x86_capability[CPUID_F_1_EDX] = edx;
849
850 if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
851 ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
852 (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
853 c->x86_cache_max_rmid = ecx;
854 c->x86_cache_occ_scale = ebx;
855 }
856 } else {
857 c->x86_cache_max_rmid = -1;
858 c->x86_cache_occ_scale = -1;
859 }
860 }
861
862 /* AMD-defined flags: level 0x80000001 */ 865 /* AMD-defined flags: level 0x80000001 */
863 eax = cpuid_eax(0x80000000); 866 eax = cpuid_eax(0x80000000);
864 c->extended_cpuid_level = eax; 867 c->extended_cpuid_level = eax;
@@ -889,6 +892,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
889 892
890 init_scattered_cpuid_features(c); 893 init_scattered_cpuid_features(c);
891 init_speculation_control(c); 894 init_speculation_control(c);
895 init_cqm(c);
892 896
893 /* 897 /*
894 * Clear/Set all flags overridden by options, after probe. 898 * Clear/Set all flags overridden by options, after probe.
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 2c0bd38a44ab..b5353244749b 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -20,6 +20,7 @@ struct cpuid_dep {
20 * but it's difficult to tell that to the init reference checker. 20 * but it's difficult to tell that to the init reference checker.
21 */ 21 */
22static const struct cpuid_dep cpuid_deps[] = { 22static const struct cpuid_dep cpuid_deps[] = {
23 { X86_FEATURE_FXSR, X86_FEATURE_FPU },
23 { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, 24 { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
24 { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, 25 { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
25 { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, 26 { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
@@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = {
27 { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, 28 { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
28 { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, 29 { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
29 { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, 30 { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
31 { X86_FEATURE_CMOV, X86_FEATURE_FXSR },
32 { X86_FEATURE_MMX, X86_FEATURE_FXSR },
33 { X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
30 { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, 34 { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
35 { X86_FEATURE_XSAVE, X86_FEATURE_FXSR },
31 { X86_FEATURE_XMM, X86_FEATURE_FXSR }, 36 { X86_FEATURE_XMM, X86_FEATURE_FXSR },
32 { X86_FEATURE_XMM2, X86_FEATURE_XMM }, 37 { X86_FEATURE_XMM2, X86_FEATURE_XMM },
33 { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, 38 { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
@@ -59,6 +64,10 @@ static const struct cpuid_dep cpuid_deps[] = {
59 { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, 64 { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
60 { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, 65 { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
61 { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, 66 { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
67 { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC },
68 { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
69 { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
70 { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
62 {} 71 {}
63}; 72};
64 73
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f17c1a714779..8d6d92ebeb54 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
66 } 66 }
67} 67}
68 68
69/*
70 * Processors which have self-snooping capability can handle conflicting
71 * memory type across CPUs by snooping its own cache. However, there exists
72 * CPU models in which having conflicting memory types still leads to
73 * unpredictable behavior, machine check errors, or hangs. Clear this
74 * feature to prevent its use on machines with known erratas.
75 */
76static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
77{
78 switch (c->x86_model) {
79 case INTEL_FAM6_CORE_YONAH:
80 case INTEL_FAM6_CORE2_MEROM:
81 case INTEL_FAM6_CORE2_MEROM_L:
82 case INTEL_FAM6_CORE2_PENRYN:
83 case INTEL_FAM6_CORE2_DUNNINGTON:
84 case INTEL_FAM6_NEHALEM:
85 case INTEL_FAM6_NEHALEM_G:
86 case INTEL_FAM6_NEHALEM_EP:
87 case INTEL_FAM6_NEHALEM_EX:
88 case INTEL_FAM6_WESTMERE:
89 case INTEL_FAM6_WESTMERE_EP:
90 case INTEL_FAM6_SANDYBRIDGE:
91 setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
92 }
93}
94
69static bool ring3mwait_disabled __read_mostly; 95static bool ring3mwait_disabled __read_mostly;
70 96
71static int __init ring3mwait_disable(char *__unused) 97static int __init ring3mwait_disable(char *__unused)
@@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
304 } 330 }
305 331
306 check_mpx_erratum(c); 332 check_mpx_erratum(c);
333 check_memory_type_self_snoop_errata(c);
307 334
308 /* 335 /*
309 * Get the number of SMT siblings early from the extended topology 336 * Get the number of SMT siblings early from the extended topology
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 785050af85e5..6ea7fdc82f3c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = {
99 [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, 99 [SMCA_PCIE] = { "pcie", "PCI Express Unit" },
100}; 100};
101 101
102static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
103{
104 [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
105};
106
107static const char *smca_get_name(enum smca_bank_types t) 102static const char *smca_get_name(enum smca_bank_types t)
108{ 103{
109 if (t >= N_SMCA_BANK_TYPES) 104 if (t >= N_SMCA_BANK_TYPES)
@@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
197static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); 192static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
198static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ 193static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
199 194
195/* Map of banks that have more than MCA_MISC0 available. */
196static DEFINE_PER_CPU(u32, smca_misc_banks_map);
197
200static void amd_threshold_interrupt(void); 198static void amd_threshold_interrupt(void);
201static void amd_deferred_error_interrupt(void); 199static void amd_deferred_error_interrupt(void);
202 200
@@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void)
206} 204}
207void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; 205void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
208 206
207static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
208{
209 u32 low, high;
210
211 /*
212 * For SMCA enabled processors, BLKPTR field of the first MISC register
213 * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
214 */
215 if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
216 return;
217
218 if (!(low & MCI_CONFIG_MCAX))
219 return;
220
221 if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
222 return;
223
224 if (low & MASK_BLKPTR_LO)
225 per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
226
227}
228
209static void smca_configure(unsigned int bank, unsigned int cpu) 229static void smca_configure(unsigned int bank, unsigned int cpu)
210{ 230{
211 unsigned int i, hwid_mcatype; 231 unsigned int i, hwid_mcatype;
@@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
243 wrmsr(smca_config, low, high); 263 wrmsr(smca_config, low, high);
244 } 264 }
245 265
266 smca_set_misc_banks_map(bank, cpu);
267
246 /* Return early if this bank was already initialized. */ 268 /* Return early if this bank was already initialized. */
247 if (smca_banks[bank].hwid) 269 if (smca_banks[bank].hwid)
248 return; 270 return;
@@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
453 wrmsr(MSR_CU_DEF_ERR, low, high); 475 wrmsr(MSR_CU_DEF_ERR, low, high);
454} 476}
455 477
456static u32 smca_get_block_address(unsigned int bank, unsigned int block) 478static u32 smca_get_block_address(unsigned int bank, unsigned int block,
479 unsigned int cpu)
457{ 480{
458 u32 low, high;
459 u32 addr = 0;
460
461 if (smca_get_bank_type(bank) == SMCA_RESERVED)
462 return addr;
463
464 if (!block) 481 if (!block)
465 return MSR_AMD64_SMCA_MCx_MISC(bank); 482 return MSR_AMD64_SMCA_MCx_MISC(bank);
466 483
467 /* Check our cache first: */ 484 if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
468 if (smca_bank_addrs[bank][block] != -1) 485 return 0;
469 return smca_bank_addrs[bank][block];
470
471 /*
472 * For SMCA enabled processors, BLKPTR field of the first MISC register
473 * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
474 */
475 if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
476 goto out;
477
478 if (!(low & MCI_CONFIG_MCAX))
479 goto out;
480
481 if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
482 (low & MASK_BLKPTR_LO))
483 addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
484 486
485out: 487 return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
486 smca_bank_addrs[bank][block] = addr;
487 return addr;
488} 488}
489 489
490static u32 get_block_address(u32 current_addr, u32 low, u32 high, 490static u32 get_block_address(u32 current_addr, u32 low, u32 high,
491 unsigned int bank, unsigned int block) 491 unsigned int bank, unsigned int block,
492 unsigned int cpu)
492{ 493{
493 u32 addr = 0, offset = 0; 494 u32 addr = 0, offset = 0;
494 495
495 if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) 496 if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
496 return addr; 497 return addr;
497 498
498 if (mce_flags.smca) 499 if (mce_flags.smca)
499 return smca_get_block_address(bank, block); 500 return smca_get_block_address(bank, block, cpu);
500 501
501 /* Fall back to method we used for older processors: */ 502 /* Fall back to method we used for older processors: */
502 switch (block) { 503 switch (block) {
@@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
624/* cpu init entry point, called from mce.c with preempt off */ 625/* cpu init entry point, called from mce.c with preempt off */
625void mce_amd_feature_init(struct cpuinfo_x86 *c) 626void mce_amd_feature_init(struct cpuinfo_x86 *c)
626{ 627{
627 u32 low = 0, high = 0, address = 0;
628 unsigned int bank, block, cpu = smp_processor_id(); 628 unsigned int bank, block, cpu = smp_processor_id();
629 u32 low = 0, high = 0, address = 0;
629 int offset = -1; 630 int offset = -1;
630 631
631 for (bank = 0; bank < mca_cfg.banks; ++bank) { 632
633 for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
632 if (mce_flags.smca) 634 if (mce_flags.smca)
633 smca_configure(bank, cpu); 635 smca_configure(bank, cpu);
634 636
635 disable_err_thresholding(c, bank); 637 disable_err_thresholding(c, bank);
636 638
637 for (block = 0; block < NR_BLOCKS; ++block) { 639 for (block = 0; block < NR_BLOCKS; ++block) {
638 address = get_block_address(address, low, high, bank, block); 640 address = get_block_address(address, low, high, bank, block, cpu);
639 if (!address) 641 if (!address)
640 break; 642 break;
641 643
@@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void)
973{ 975{
974 unsigned int bank; 976 unsigned int bank;
975 977
976 for (bank = 0; bank < mca_cfg.banks; ++bank) 978 for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
977 log_error_deferred(bank); 979 log_error_deferred(bank);
978} 980}
979 981
@@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void)
1014 struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; 1016 struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
1015 unsigned int bank, cpu = smp_processor_id(); 1017 unsigned int bank, cpu = smp_processor_id();
1016 1018
1017 for (bank = 0; bank < mca_cfg.banks; ++bank) { 1019 for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
1018 if (!(per_cpu(bank_map, cpu) & (1 << bank))) 1020 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
1019 continue; 1021 continue;
1020 1022
@@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
1201 u32 low, high; 1203 u32 low, high;
1202 int err; 1204 int err;
1203 1205
1204 if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) 1206 if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
1205 return 0; 1207 return 0;
1206 1208
1207 if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) 1209 if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
@@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
1252 if (err) 1254 if (err)
1253 goto out_free; 1255 goto out_free;
1254recurse: 1256recurse:
1255 address = get_block_address(address, low, high, bank, ++block); 1257 address = get_block_address(address, low, high, bank, ++block, cpu);
1256 if (!address) 1258 if (!address)
1257 return 0; 1259 return 0;
1258 1260
@@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu)
1435{ 1437{
1436 unsigned int bank; 1438 unsigned int bank;
1437 1439
1438 for (bank = 0; bank < mca_cfg.banks; ++bank) { 1440 for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
1439 if (!(per_cpu(bank_map, cpu) & (1 << bank))) 1441 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
1440 continue; 1442 continue;
1441 threshold_remove_bank(cpu, bank); 1443 threshold_remove_bank(cpu, bank);
@@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu)
1456 if (bp) 1458 if (bp)
1457 return 0; 1459 return 0;
1458 1460
1459 bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *), 1461 bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
1460 GFP_KERNEL); 1462 GFP_KERNEL);
1461 if (!bp) 1463 if (!bp)
1462 return -ENOMEM; 1464 return -ENOMEM;
1463 1465
1464 per_cpu(threshold_banks, cpu) = bp; 1466 per_cpu(threshold_banks, cpu) = bp;
1465 1467
1466 for (bank = 0; bank < mca_cfg.banks; ++bank) { 1468 for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
1467 if (!(per_cpu(bank_map, cpu) & (1 << bank))) 1469 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
1468 continue; 1470 continue;
1469 err = threshold_create_bank(cpu, bank); 1471 err = threshold_create_bank(cpu, bank);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 282916f3b8d8..066562a1ea20 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -65,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
65 65
66DEFINE_PER_CPU(unsigned, mce_exception_count); 66DEFINE_PER_CPU(unsigned, mce_exception_count);
67 67
68struct mce_bank *mce_banks __read_mostly; 68DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
69
70struct mce_bank {
71 u64 ctl; /* subevents to enable */
72 bool init; /* initialise bank? */
73};
74static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
75
76#define ATTR_LEN 16
77/* One object for each MCE bank, shared by all CPUs */
78struct mce_bank_dev {
79 struct device_attribute attr; /* device attribute */
80 char attrname[ATTR_LEN]; /* attribute name */
81 u8 bank; /* bank number */
82};
83static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
84
69struct mce_vendor_flags mce_flags __read_mostly; 85struct mce_vendor_flags mce_flags __read_mostly;
70 86
71struct mca_config mca_cfg __read_mostly = { 87struct mca_config mca_cfg __read_mostly = {
@@ -675,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
675 */ 691 */
676bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 692bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
677{ 693{
694 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
678 bool error_seen = false; 695 bool error_seen = false;
679 struct mce m; 696 struct mce m;
680 int i; 697 int i;
@@ -686,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
686 if (flags & MCP_TIMESTAMP) 703 if (flags & MCP_TIMESTAMP)
687 m.tsc = rdtsc(); 704 m.tsc = rdtsc();
688 705
689 for (i = 0; i < mca_cfg.banks; i++) { 706 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
690 if (!mce_banks[i].ctl || !test_bit(i, *b)) 707 if (!mce_banks[i].ctl || !test_bit(i, *b))
691 continue; 708 continue;
692 709
@@ -788,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
788 char *tmp; 805 char *tmp;
789 int i; 806 int i;
790 807
791 for (i = 0; i < mca_cfg.banks; i++) { 808 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
792 m->status = mce_rdmsrl(msr_ops.status(i)); 809 m->status = mce_rdmsrl(msr_ops.status(i));
793 if (!(m->status & MCI_STATUS_VAL)) 810 if (!(m->status & MCI_STATUS_VAL))
794 continue; 811 continue;
@@ -1068,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear)
1068{ 1085{
1069 int i; 1086 int i;
1070 1087
1071 for (i = 0; i < mca_cfg.banks; i++) { 1088 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1072 if (test_bit(i, toclear)) 1089 if (test_bit(i, toclear))
1073 mce_wrmsrl(msr_ops.status(i), 0); 1090 mce_wrmsrl(msr_ops.status(i), 0);
1074 } 1091 }
@@ -1122,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
1122 unsigned long *toclear, unsigned long *valid_banks, 1139 unsigned long *toclear, unsigned long *valid_banks,
1123 int no_way_out, int *worst) 1140 int no_way_out, int *worst)
1124{ 1141{
1142 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1125 struct mca_config *cfg = &mca_cfg; 1143 struct mca_config *cfg = &mca_cfg;
1126 int severity, i; 1144 int severity, i;
1127 1145
1128 for (i = 0; i < cfg->banks; i++) { 1146 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1129 __clear_bit(i, toclear); 1147 __clear_bit(i, toclear);
1130 if (!test_bit(i, valid_banks)) 1148 if (!test_bit(i, valid_banks))
1131 continue; 1149 continue;
@@ -1463,27 +1481,29 @@ int mce_notify_irq(void)
1463} 1481}
1464EXPORT_SYMBOL_GPL(mce_notify_irq); 1482EXPORT_SYMBOL_GPL(mce_notify_irq);
1465 1483
1466static int __mcheck_cpu_mce_banks_init(void) 1484static void __mcheck_cpu_mce_banks_init(void)
1467{ 1485{
1486 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1487 u8 n_banks = this_cpu_read(mce_num_banks);
1468 int i; 1488 int i;
1469 1489
1470 mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); 1490 for (i = 0; i < n_banks; i++) {
1471 if (!mce_banks)
1472 return -ENOMEM;
1473
1474 for (i = 0; i < MAX_NR_BANKS; i++) {
1475 struct mce_bank *b = &mce_banks[i]; 1491 struct mce_bank *b = &mce_banks[i];
1476 1492
1493 /*
1494 * Init them all, __mcheck_cpu_apply_quirks() is going to apply
1495 * the required vendor quirks before
1496 * __mcheck_cpu_init_clear_banks() does the final bank setup.
1497 */
1477 b->ctl = -1ULL; 1498 b->ctl = -1ULL;
1478 b->init = 1; 1499 b->init = 1;
1479 } 1500 }
1480 return 0;
1481} 1501}
1482 1502
1483/* 1503/*
1484 * Initialize Machine Checks for a CPU. 1504 * Initialize Machine Checks for a CPU.
1485 */ 1505 */
1486static int __mcheck_cpu_cap_init(void) 1506static void __mcheck_cpu_cap_init(void)
1487{ 1507{
1488 u64 cap; 1508 u64 cap;
1489 u8 b; 1509 u8 b;
@@ -1491,16 +1511,16 @@ static int __mcheck_cpu_cap_init(void)
1491 rdmsrl(MSR_IA32_MCG_CAP, cap); 1511 rdmsrl(MSR_IA32_MCG_CAP, cap);
1492 1512
1493 b = cap & MCG_BANKCNT_MASK; 1513 b = cap & MCG_BANKCNT_MASK;
1494 if (WARN_ON_ONCE(b > MAX_NR_BANKS)) 1514
1515 if (b > MAX_NR_BANKS) {
1516 pr_warn("CPU%d: Using only %u machine check banks out of %u\n",
1517 smp_processor_id(), MAX_NR_BANKS, b);
1495 b = MAX_NR_BANKS; 1518 b = MAX_NR_BANKS;
1519 }
1496 1520
1497 mca_cfg.banks = max(mca_cfg.banks, b); 1521 this_cpu_write(mce_num_banks, b);
1498 1522
1499 if (!mce_banks) { 1523 __mcheck_cpu_mce_banks_init();
1500 int err = __mcheck_cpu_mce_banks_init();
1501 if (err)
1502 return err;
1503 }
1504 1524
1505 /* Use accurate RIP reporting if available. */ 1525 /* Use accurate RIP reporting if available. */
1506 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) 1526 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
@@ -1508,8 +1528,6 @@ static int __mcheck_cpu_cap_init(void)
1508 1528
1509 if (cap & MCG_SER_P) 1529 if (cap & MCG_SER_P)
1510 mca_cfg.ser = 1; 1530 mca_cfg.ser = 1;
1511
1512 return 0;
1513} 1531}
1514 1532
1515static void __mcheck_cpu_init_generic(void) 1533static void __mcheck_cpu_init_generic(void)
@@ -1536,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void)
1536 1554
1537static void __mcheck_cpu_init_clear_banks(void) 1555static void __mcheck_cpu_init_clear_banks(void)
1538{ 1556{
1557 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1539 int i; 1558 int i;
1540 1559
1541 for (i = 0; i < mca_cfg.banks; i++) { 1560 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1542 struct mce_bank *b = &mce_banks[i]; 1561 struct mce_bank *b = &mce_banks[i];
1543 1562
1544 if (!b->init) 1563 if (!b->init)
@@ -1549,6 +1568,33 @@ static void __mcheck_cpu_init_clear_banks(void)
1549} 1568}
1550 1569
1551/* 1570/*
1571 * Do a final check to see if there are any unused/RAZ banks.
1572 *
1573 * This must be done after the banks have been initialized and any quirks have
1574 * been applied.
1575 *
1576 * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
1577 * Otherwise, a user who disables a bank will not be able to re-enable it
1578 * without a system reboot.
1579 */
1580static void __mcheck_cpu_check_banks(void)
1581{
1582 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1583 u64 msrval;
1584 int i;
1585
1586 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1587 struct mce_bank *b = &mce_banks[i];
1588
1589 if (!b->init)
1590 continue;
1591
1592 rdmsrl(msr_ops.ctl(i), msrval);
1593 b->init = !!msrval;
1594 }
1595}
1596
1597/*
1552 * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and 1598 * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
1553 * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM 1599 * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
1554 * Vol 3B Table 15-20). But this confuses both the code that determines 1600 * Vol 3B Table 15-20). But this confuses both the code that determines
@@ -1579,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
1579/* Add per CPU specific workarounds here */ 1625/* Add per CPU specific workarounds here */
1580static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) 1626static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1581{ 1627{
1628 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1582 struct mca_config *cfg = &mca_cfg; 1629 struct mca_config *cfg = &mca_cfg;
1583 1630
1584 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1631 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
@@ -1588,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1588 1635
1589 /* This should be disabled by the BIOS, but isn't always */ 1636 /* This should be disabled by the BIOS, but isn't always */
1590 if (c->x86_vendor == X86_VENDOR_AMD) { 1637 if (c->x86_vendor == X86_VENDOR_AMD) {
1591 if (c->x86 == 15 && cfg->banks > 4) { 1638 if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
1592 /* 1639 /*
1593 * disable GART TBL walk error reporting, which 1640 * disable GART TBL walk error reporting, which
1594 * trips off incorrectly with the IOMMU & 3ware 1641 * trips off incorrectly with the IOMMU & 3ware
@@ -1607,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1607 * Various K7s with broken bank 0 around. Always disable 1654 * Various K7s with broken bank 0 around. Always disable
1608 * by default. 1655 * by default.
1609 */ 1656 */
1610 if (c->x86 == 6 && cfg->banks > 0) 1657 if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
1611 mce_banks[0].ctl = 0; 1658 mce_banks[0].ctl = 0;
1612 1659
1613 /* 1660 /*
@@ -1629,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1629 * valid event later, merely don't write CTL0. 1676 * valid event later, merely don't write CTL0.
1630 */ 1677 */
1631 1678
1632 if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0) 1679 if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
1633 mce_banks[0].init = 0; 1680 mce_banks[0].init = 0;
1634 1681
1635 /* 1682 /*
@@ -1815,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
1815 if (!mce_available(c)) 1862 if (!mce_available(c))
1816 return; 1863 return;
1817 1864
1818 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { 1865 __mcheck_cpu_cap_init();
1866
1867 if (__mcheck_cpu_apply_quirks(c) < 0) {
1819 mca_cfg.disabled = 1; 1868 mca_cfg.disabled = 1;
1820 return; 1869 return;
1821 } 1870 }
@@ -1832,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
1832 __mcheck_cpu_init_generic(); 1881 __mcheck_cpu_init_generic();
1833 __mcheck_cpu_init_vendor(c); 1882 __mcheck_cpu_init_vendor(c);
1834 __mcheck_cpu_init_clear_banks(); 1883 __mcheck_cpu_init_clear_banks();
1884 __mcheck_cpu_check_banks();
1835 __mcheck_cpu_setup_timer(); 1885 __mcheck_cpu_setup_timer();
1836} 1886}
1837 1887
@@ -1863,7 +1913,7 @@ static void __mce_disable_bank(void *arg)
1863 1913
1864void mce_disable_bank(int bank) 1914void mce_disable_bank(int bank)
1865{ 1915{
1866 if (bank >= mca_cfg.banks) { 1916 if (bank >= this_cpu_read(mce_num_banks)) {
1867 pr_warn(FW_BUG 1917 pr_warn(FW_BUG
1868 "Ignoring request to disable invalid MCA bank %d.\n", 1918 "Ignoring request to disable invalid MCA bank %d.\n",
1869 bank); 1919 bank);
@@ -1949,9 +1999,10 @@ int __init mcheck_init(void)
1949 */ 1999 */
1950static void mce_disable_error_reporting(void) 2000static void mce_disable_error_reporting(void)
1951{ 2001{
2002 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1952 int i; 2003 int i;
1953 2004
1954 for (i = 0; i < mca_cfg.banks; i++) { 2005 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1955 struct mce_bank *b = &mce_banks[i]; 2006 struct mce_bank *b = &mce_banks[i];
1956 2007
1957 if (b->init) 2008 if (b->init)
@@ -2051,26 +2102,47 @@ static struct bus_type mce_subsys = {
2051 2102
2052DEFINE_PER_CPU(struct device *, mce_device); 2103DEFINE_PER_CPU(struct device *, mce_device);
2053 2104
2054static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) 2105static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
2055{ 2106{
2056 return container_of(attr, struct mce_bank, attr); 2107 return container_of(attr, struct mce_bank_dev, attr);
2057} 2108}
2058 2109
2059static ssize_t show_bank(struct device *s, struct device_attribute *attr, 2110static ssize_t show_bank(struct device *s, struct device_attribute *attr,
2060 char *buf) 2111 char *buf)
2061{ 2112{
2062 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); 2113 u8 bank = attr_to_bank(attr)->bank;
2114 struct mce_bank *b;
2115
2116 if (bank >= per_cpu(mce_num_banks, s->id))
2117 return -EINVAL;
2118
2119 b = &per_cpu(mce_banks_array, s->id)[bank];
2120
2121 if (!b->init)
2122 return -ENODEV;
2123
2124 return sprintf(buf, "%llx\n", b->ctl);
2063} 2125}
2064 2126
2065static ssize_t set_bank(struct device *s, struct device_attribute *attr, 2127static ssize_t set_bank(struct device *s, struct device_attribute *attr,
2066 const char *buf, size_t size) 2128 const char *buf, size_t size)
2067{ 2129{
2130 u8 bank = attr_to_bank(attr)->bank;
2131 struct mce_bank *b;
2068 u64 new; 2132 u64 new;
2069 2133
2070 if (kstrtou64(buf, 0, &new) < 0) 2134 if (kstrtou64(buf, 0, &new) < 0)
2071 return -EINVAL; 2135 return -EINVAL;
2072 2136
2073 attr_to_bank(attr)->ctl = new; 2137 if (bank >= per_cpu(mce_num_banks, s->id))
2138 return -EINVAL;
2139
2140 b = &per_cpu(mce_banks_array, s->id)[bank];
2141
2142 if (!b->init)
2143 return -ENODEV;
2144
2145 b->ctl = new;
2074 mce_restart(); 2146 mce_restart();
2075 2147
2076 return size; 2148 return size;
@@ -2185,7 +2257,7 @@ static void mce_device_release(struct device *dev)
2185 kfree(dev); 2257 kfree(dev);
2186} 2258}
2187 2259
2188/* Per cpu device init. All of the cpus still share the same ctrl bank: */ 2260/* Per CPU device init. All of the CPUs still share the same bank device: */
2189static int mce_device_create(unsigned int cpu) 2261static int mce_device_create(unsigned int cpu)
2190{ 2262{
2191 struct device *dev; 2263 struct device *dev;
@@ -2217,8 +2289,8 @@ static int mce_device_create(unsigned int cpu)
2217 if (err) 2289 if (err)
2218 goto error; 2290 goto error;
2219 } 2291 }
2220 for (j = 0; j < mca_cfg.banks; j++) { 2292 for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) {
2221 err = device_create_file(dev, &mce_banks[j].attr); 2293 err = device_create_file(dev, &mce_bank_devs[j].attr);
2222 if (err) 2294 if (err)
2223 goto error2; 2295 goto error2;
2224 } 2296 }
@@ -2228,7 +2300,7 @@ static int mce_device_create(unsigned int cpu)
2228 return 0; 2300 return 0;
2229error2: 2301error2:
2230 while (--j >= 0) 2302 while (--j >= 0)
2231 device_remove_file(dev, &mce_banks[j].attr); 2303 device_remove_file(dev, &mce_bank_devs[j].attr);
2232error: 2304error:
2233 while (--i >= 0) 2305 while (--i >= 0)
2234 device_remove_file(dev, mce_device_attrs[i]); 2306 device_remove_file(dev, mce_device_attrs[i]);
@@ -2249,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu)
2249 for (i = 0; mce_device_attrs[i]; i++) 2321 for (i = 0; mce_device_attrs[i]; i++)
2250 device_remove_file(dev, mce_device_attrs[i]); 2322 device_remove_file(dev, mce_device_attrs[i]);
2251 2323
2252 for (i = 0; i < mca_cfg.banks; i++) 2324 for (i = 0; i < per_cpu(mce_num_banks, cpu); i++)
2253 device_remove_file(dev, &mce_banks[i].attr); 2325 device_remove_file(dev, &mce_bank_devs[i].attr);
2254 2326
2255 device_unregister(dev); 2327 device_unregister(dev);
2256 cpumask_clear_cpu(cpu, mce_device_initialized); 2328 cpumask_clear_cpu(cpu, mce_device_initialized);
@@ -2271,6 +2343,7 @@ static void mce_disable_cpu(void)
2271 2343
2272static void mce_reenable_cpu(void) 2344static void mce_reenable_cpu(void)
2273{ 2345{
2346 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
2274 int i; 2347 int i;
2275 2348
2276 if (!mce_available(raw_cpu_ptr(&cpu_info))) 2349 if (!mce_available(raw_cpu_ptr(&cpu_info)))
@@ -2278,7 +2351,7 @@ static void mce_reenable_cpu(void)
2278 2351
2279 if (!cpuhp_tasks_frozen) 2352 if (!cpuhp_tasks_frozen)
2280 cmci_reenable(); 2353 cmci_reenable();
2281 for (i = 0; i < mca_cfg.banks; i++) { 2354 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
2282 struct mce_bank *b = &mce_banks[i]; 2355 struct mce_bank *b = &mce_banks[i];
2283 2356
2284 if (b->init) 2357 if (b->init)
@@ -2328,10 +2401,12 @@ static __init void mce_init_banks(void)
2328{ 2401{
2329 int i; 2402 int i;
2330 2403
2331 for (i = 0; i < mca_cfg.banks; i++) { 2404 for (i = 0; i < MAX_NR_BANKS; i++) {
2332 struct mce_bank *b = &mce_banks[i]; 2405 struct mce_bank_dev *b = &mce_bank_devs[i];
2333 struct device_attribute *a = &b->attr; 2406 struct device_attribute *a = &b->attr;
2334 2407
2408 b->bank = i;
2409
2335 sysfs_attr_init(&a->attr); 2410 sysfs_attr_init(&a->attr);
2336 a->attr.name = b->attrname; 2411 a->attr.name = b->attrname;
2337 snprintf(b->attrname, ATTR_LEN, "bank%d", i); 2412 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
@@ -2441,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val)
2441DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, 2516DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
2442 "%llu\n"); 2517 "%llu\n");
2443 2518
2444static int __init mcheck_debugfs_init(void) 2519static void __init mcheck_debugfs_init(void)
2445{ 2520{
2446 struct dentry *dmce, *ffake_panic; 2521 struct dentry *dmce;
2447 2522
2448 dmce = mce_get_debugfs_dir(); 2523 dmce = mce_get_debugfs_dir();
2449 if (!dmce) 2524 debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL,
2450 return -ENOMEM; 2525 &fake_panic_fops);
2451 ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
2452 NULL, &fake_panic_fops);
2453 if (!ffake_panic)
2454 return -ENOMEM;
2455
2456 return 0;
2457} 2526}
2458#else 2527#else
2459static int __init mcheck_debugfs_init(void) { return -EINVAL; } 2528static void __init mcheck_debugfs_init(void) { }
2460#endif 2529#endif
2461 2530
2462DEFINE_STATIC_KEY_FALSE(mcsafe_key); 2531DEFINE_STATIC_KEY_FALSE(mcsafe_key);
@@ -2464,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
2464 2533
2465static int __init mcheck_late_init(void) 2534static int __init mcheck_late_init(void)
2466{ 2535{
2467 pr_info("Using %d MCE banks\n", mca_cfg.banks);
2468
2469 if (mca_cfg.recovery) 2536 if (mca_cfg.recovery)
2470 static_branch_inc(&mcsafe_key); 2537 static_branch_inc(&mcsafe_key);
2471 2538
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5d108f70f315..1f30117b24ba 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -645,7 +645,6 @@ static const struct file_operations readme_fops = {
645 645
646static struct dfs_node { 646static struct dfs_node {
647 char *name; 647 char *name;
648 struct dentry *d;
649 const struct file_operations *fops; 648 const struct file_operations *fops;
650 umode_t perm; 649 umode_t perm;
651} dfs_fls[] = { 650} dfs_fls[] = {
@@ -659,49 +658,23 @@ static struct dfs_node {
659 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH }, 658 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
660}; 659};
661 660
662static int __init debugfs_init(void) 661static void __init debugfs_init(void)
663{ 662{
664 unsigned int i; 663 unsigned int i;
665 664
666 dfs_inj = debugfs_create_dir("mce-inject", NULL); 665 dfs_inj = debugfs_create_dir("mce-inject", NULL);
667 if (!dfs_inj)
668 return -EINVAL;
669
670 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
671 dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
672 dfs_fls[i].perm,
673 dfs_inj,
674 &i_mce,
675 dfs_fls[i].fops);
676
677 if (!dfs_fls[i].d)
678 goto err_dfs_add;
679 }
680
681 return 0;
682
683err_dfs_add:
684 while (i-- > 0)
685 debugfs_remove(dfs_fls[i].d);
686 666
687 debugfs_remove(dfs_inj); 667 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
688 dfs_inj = NULL; 668 debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
689 669 &i_mce, dfs_fls[i].fops);
690 return -ENODEV;
691} 670}
692 671
693static int __init inject_init(void) 672static int __init inject_init(void)
694{ 673{
695 int err;
696
697 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) 674 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
698 return -ENOMEM; 675 return -ENOMEM;
699 676
700 err = debugfs_init(); 677 debugfs_init();
701 if (err) {
702 free_cpumask_var(mce_inject_cpumask);
703 return err;
704 }
705 678
706 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify"); 679 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
707 mce_register_injector_chain(&inject_nb); 680 mce_register_injector_chain(&inject_nb);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index a34b55baa7aa..43031db429d2 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -22,17 +22,8 @@ enum severity_level {
22 22
23extern struct blocking_notifier_head x86_mce_decoder_chain; 23extern struct blocking_notifier_head x86_mce_decoder_chain;
24 24
25#define ATTR_LEN 16
26#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ 25#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
27 26
28/* One object for each MCE bank, shared by all CPUs */
29struct mce_bank {
30 u64 ctl; /* subevents to enable */
31 unsigned char init; /* initialise bank? */
32 struct device_attribute attr; /* device attribute */
33 char attrname[ATTR_LEN]; /* attribute name */
34};
35
36struct mce_evt_llist { 27struct mce_evt_llist {
37 struct llist_node llnode; 28 struct llist_node llnode;
38 struct mce mce; 29 struct mce mce;
@@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void);
47extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); 38extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
48struct dentry *mce_get_debugfs_dir(void); 39struct dentry *mce_get_debugfs_dir(void);
49 40
50extern struct mce_bank *mce_banks;
51extern mce_banks_t mce_banks_ce_disabled; 41extern mce_banks_t mce_banks_ce_disabled;
52 42
53#ifdef CONFIG_X86_MCE_INTEL 43#ifdef CONFIG_X86_MCE_INTEL
@@ -128,7 +118,6 @@ struct mca_config {
128 bios_cmci_threshold : 1, 118 bios_cmci_threshold : 1,
129 __reserved : 59; 119 __reserved : 59;
130 120
131 u8 banks;
132 s8 bootlog; 121 s8 bootlog;
133 int tolerant; 122 int tolerant;
134 int monarch_timeout; 123 int monarch_timeout;
@@ -137,6 +126,7 @@ struct mca_config {
137}; 126};
138 127
139extern struct mca_config mca_cfg; 128extern struct mca_config mca_cfg;
129DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
140 130
141struct mce_vendor_flags { 131struct mce_vendor_flags {
142 /* 132 /*
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 2d33a26d257e..210f1f5db5f7 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -400,21 +400,13 @@ static const struct file_operations severities_coverage_fops = {
400 400
401static int __init severities_debugfs_init(void) 401static int __init severities_debugfs_init(void)
402{ 402{
403 struct dentry *dmce, *fsev; 403 struct dentry *dmce;
404 404
405 dmce = mce_get_debugfs_dir(); 405 dmce = mce_get_debugfs_dir();
406 if (!dmce)
407 goto err_out;
408
409 fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
410 &severities_coverage_fops);
411 if (!fsev)
412 goto err_out;
413 406
407 debugfs_create_file("severities-coverage", 0444, dmce, NULL,
408 &severities_coverage_fops);
414 return 0; 409 return 0;
415
416err_out:
417 return -ENOMEM;
418} 410}
419late_initcall(severities_debugfs_init); 411late_initcall(severities_debugfs_init);
420#endif /* CONFIG_DEBUG_FS */ 412#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index a813987b5552..cb0fdcaf1415 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = {
789 .resume = mc_bp_resume, 789 .resume = mc_bp_resume,
790}; 790};
791 791
792static int mc_cpu_online(unsigned int cpu) 792static int mc_cpu_starting(unsigned int cpu)
793{ 793{
794 struct device *dev;
795
796 dev = get_cpu_device(cpu);
797 microcode_update_cpu(cpu); 794 microcode_update_cpu(cpu);
798 pr_debug("CPU%d added\n", cpu); 795 pr_debug("CPU%d added\n", cpu);
796 return 0;
797}
798
799static int mc_cpu_online(unsigned int cpu)
800{
801 struct device *dev = get_cpu_device(cpu);
799 802
800 if (sysfs_create_group(&dev->kobj, &mc_attr_group)) 803 if (sysfs_create_group(&dev->kobj, &mc_attr_group))
801 pr_err("Failed to create group for CPU%d\n", cpu); 804 pr_err("Failed to create group for CPU%d\n", cpu);
@@ -872,7 +875,9 @@ int __init microcode_init(void)
872 goto out_ucode_group; 875 goto out_ucode_group;
873 876
874 register_syscore_ops(&mc_syscore_ops); 877 register_syscore_ops(&mc_syscore_ops);
875 cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", 878 cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
879 mc_cpu_starting, NULL);
880 cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
876 mc_cpu_online, mc_cpu_down_prep); 881 mc_cpu_online, mc_cpu_down_prep);
877 882
878 pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); 883 pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 7df29f08871b..062f77279ce3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -17,6 +17,7 @@
17#include <linux/irq.h> 17#include <linux/irq.h>
18#include <linux/kexec.h> 18#include <linux/kexec.h>
19#include <linux/i8253.h> 19#include <linux/i8253.h>
20#include <linux/random.h>
20#include <asm/processor.h> 21#include <asm/processor.h>
21#include <asm/hypervisor.h> 22#include <asm/hypervisor.h>
22#include <asm/hyperv-tlfs.h> 23#include <asm/hyperv-tlfs.h>
@@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
80 inc_irq_stat(hyperv_stimer0_count); 81 inc_irq_stat(hyperv_stimer0_count);
81 if (hv_stimer0_handler) 82 if (hv_stimer0_handler)
82 hv_stimer0_handler(); 83 hv_stimer0_handler();
84 add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
83 ack_APIC_irq(); 85 ack_APIC_irq();
84 86
85 exiting_irq(); 87 exiting_irq();
@@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
89int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) 91int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
90{ 92{
91 *vector = HYPERV_STIMER0_VECTOR; 93 *vector = HYPERV_STIMER0_VECTOR;
92 *irq = 0; /* Unused on x86/x64 */ 94 *irq = -1; /* Unused on x86/x64 */
93 hv_stimer0_handler = handler; 95 hv_stimer0_handler = handler;
94 return 0; 96 return 0;
95} 97}
@@ -266,9 +268,9 @@ static void __init ms_hyperv_init_platform(void)
266 268
267 rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); 269 rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
268 hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); 270 hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
269 lapic_timer_frequency = hv_lapic_frequency; 271 lapic_timer_period = hv_lapic_frequency;
270 pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n", 272 pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
271 lapic_timer_frequency); 273 lapic_timer_period);
272 } 274 }
273 275
274 register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, 276 register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 9356c1c9024d..aa5c064a6a22 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
743 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 743 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
744 cr0 = read_cr0() | X86_CR0_CD; 744 cr0 = read_cr0() | X86_CR0_CD;
745 write_cr0(cr0); 745 write_cr0(cr0);
746 wbinvd(); 746
747 /*
748 * Cache flushing is the most time-consuming step when programming
749 * the MTRRs. Fortunately, as per the Intel Software Development
750 * Manual, we can skip it if the processor supports cache self-
751 * snooping.
752 */
753 if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
754 wbinvd();
747 755
748 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 756 /* Save value of CR4 and clear Page Global Enable (bit 7) */
749 if (boot_cpu_has(X86_FEATURE_PGE)) { 757 if (boot_cpu_has(X86_FEATURE_PGE)) {
@@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
760 768
761 /* Disable MTRRs, and set the default type to uncached */ 769 /* Disable MTRRs, and set the default type to uncached */
762 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); 770 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
763 wbinvd(); 771
772 /* Again, only flush caches if we have to. */
773 if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
774 wbinvd();
764} 775}
765 776
766static void post_set(void) __releases(set_atomicity_lock) 777static void post_set(void) __releases(set_atomicity_lock)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2131b8bbaad7..2f4824793798 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -796,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
796 struct seq_file *seq, void *v) 796 struct seq_file *seq, void *v)
797{ 797{
798 struct rdt_resource *r = of->kn->parent->priv; 798 struct rdt_resource *r = of->kn->parent->priv;
799 u32 sw_shareable = 0, hw_shareable = 0; 799 /*
800 u32 exclusive = 0, pseudo_locked = 0; 800 * Use unsigned long even though only 32 bits are used to ensure
801 * test_bit() is used safely.
802 */
803 unsigned long sw_shareable = 0, hw_shareable = 0;
804 unsigned long exclusive = 0, pseudo_locked = 0;
801 struct rdt_domain *dom; 805 struct rdt_domain *dom;
802 int i, hwb, swb, excl, psl; 806 int i, hwb, swb, excl, psl;
803 enum rdtgrp_mode mode; 807 enum rdtgrp_mode mode;
@@ -842,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
842 } 846 }
843 for (i = r->cache.cbm_len - 1; i >= 0; i--) { 847 for (i = r->cache.cbm_len - 1; i >= 0; i--) {
844 pseudo_locked = dom->plr ? dom->plr->cbm : 0; 848 pseudo_locked = dom->plr ? dom->plr->cbm : 0;
845 hwb = test_bit(i, (unsigned long *)&hw_shareable); 849 hwb = test_bit(i, &hw_shareable);
846 swb = test_bit(i, (unsigned long *)&sw_shareable); 850 swb = test_bit(i, &sw_shareable);
847 excl = test_bit(i, (unsigned long *)&exclusive); 851 excl = test_bit(i, &exclusive);
848 psl = test_bit(i, (unsigned long *)&pseudo_locked); 852 psl = test_bit(i, &pseudo_locked);
849 if (hwb && swb) 853 if (hwb && swb)
850 seq_putc(seq, 'X'); 854 seq_putc(seq, 'X');
851 else if (hwb && !swb) 855 else if (hwb && !swb)
@@ -2486,26 +2490,19 @@ out_destroy:
2486 */ 2490 */
2487static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) 2491static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
2488{ 2492{
2489 /* 2493 unsigned long val = *_val;
2490 * Convert the u32 _val to an unsigned long required by all the bit
2491 * operations within this function. No more than 32 bits of this
2492 * converted value can be accessed because all bit operations are
2493 * additionally provided with cbm_len that is initialized during
2494 * hardware enumeration using five bits from the EAX register and
2495 * thus never can exceed 32 bits.
2496 */
2497 unsigned long *val = (unsigned long *)_val;
2498 unsigned int cbm_len = r->cache.cbm_len; 2494 unsigned int cbm_len = r->cache.cbm_len;
2499 unsigned long first_bit, zero_bit; 2495 unsigned long first_bit, zero_bit;
2500 2496
2501 if (*val == 0) 2497 if (val == 0)
2502 return; 2498 return;
2503 2499
2504 first_bit = find_first_bit(val, cbm_len); 2500 first_bit = find_first_bit(&val, cbm_len);
2505 zero_bit = find_next_zero_bit(val, cbm_len, first_bit); 2501 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
2506 2502
2507 /* Clear any remaining bits to ensure contiguous region */ 2503 /* Clear any remaining bits to ensure contiguous region */
2508 bitmap_clear(val, zero_bit, cbm_len - zero_bit); 2504 bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
2505 *_val = (u32)val;
2509} 2506}
2510 2507
2511/* 2508/*
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 94aa1c72ca98..adf9b71386ef 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,10 @@ struct cpuid_bit {
26static const struct cpuid_bit cpuid_bits[] = { 26static const struct cpuid_bit cpuid_bits[] = {
27 { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, 27 { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
28 { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, 28 { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
29 { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
30 { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
31 { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
32 { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
29 { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, 33 { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
30 { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, 34 { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
31 { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, 35 { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
new file mode 100644
index 000000000000..6a204e7336c1
--- /dev/null
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -0,0 +1,200 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/syscore_ops.h>
3#include <linux/suspend.h>
4#include <linux/cpu.h>
5
6#include <asm/msr.h>
7
8#define UMWAIT_C02_ENABLE 0
9
10#define UMWAIT_CTRL_VAL(max_time, c02_disable) \
11 (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \
12 ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE))
13
14/*
15 * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default,
16 * umwait max time is 100000 in TSC-quanta and C0.2 is enabled
17 */
18static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
19
20/*
21 * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
22 * the sysfs write functions.
23 */
24static DEFINE_MUTEX(umwait_lock);
25
26static void umwait_update_control_msr(void * unused)
27{
28 lockdep_assert_irqs_disabled();
29 wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0);
30}
31
32/*
33 * The CPU hotplug callback sets the control MSR to the global control
34 * value.
35 *
36 * Disable interrupts so the read of umwait_control_cached and the WRMSR
37 * are protected against a concurrent sysfs write. Otherwise the sysfs
38 * write could update the cached value after it had been read on this CPU
39 * and issue the IPI before the old value had been written. The IPI would
40 * interrupt, write the new value and after return from IPI the previous
41 * value would be written by this CPU.
42 *
43 * With interrupts disabled the upcoming CPU either sees the new control
44 * value or the IPI is updating this CPU to the new control value after
45 * interrupts have been reenabled.
46 */
47static int umwait_cpu_online(unsigned int cpu)
48{
49 local_irq_disable();
50 umwait_update_control_msr(NULL);
51 local_irq_enable();
52 return 0;
53}
54
55/*
56 * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
57 * is the only active CPU at this time. The MSR is set up on the APs via the
58 * CPU hotplug callback.
59 *
60 * This function is invoked on resume from suspend and hibernation. On
61 * resume from suspend the restore should be not required, but we neither
62 * trust the firmware nor does it matter if the same value is written
63 * again.
64 */
65static void umwait_syscore_resume(void)
66{
67 umwait_update_control_msr(NULL);
68}
69
70static struct syscore_ops umwait_syscore_ops = {
71 .resume = umwait_syscore_resume,
72};
73
74/* sysfs interface */
75
76/*
77 * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled.
78 * Otherwise, C0.2 is enabled.
79 */
80static inline bool umwait_ctrl_c02_enabled(u32 ctrl)
81{
82 return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE);
83}
84
85static inline u32 umwait_ctrl_max_time(u32 ctrl)
86{
87 return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
88}
89
90static inline void umwait_update_control(u32 maxtime, bool c02_enable)
91{
92 u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
93
94 if (!c02_enable)
95 ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE;
96
97 WRITE_ONCE(umwait_control_cached, ctrl);
98 /* Propagate to all CPUs */
99 on_each_cpu(umwait_update_control_msr, NULL, 1);
100}
101
102static ssize_t
103enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf)
104{
105 u32 ctrl = READ_ONCE(umwait_control_cached);
106
107 return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl));
108}
109
110static ssize_t enable_c02_store(struct device *dev,
111 struct device_attribute *attr,
112 const char *buf, size_t count)
113{
114 bool c02_enable;
115 u32 ctrl;
116 int ret;
117
118 ret = kstrtobool(buf, &c02_enable);
119 if (ret)
120 return ret;
121
122 mutex_lock(&umwait_lock);
123
124 ctrl = READ_ONCE(umwait_control_cached);
125 if (c02_enable != umwait_ctrl_c02_enabled(ctrl))
126 umwait_update_control(ctrl, c02_enable);
127
128 mutex_unlock(&umwait_lock);
129
130 return count;
131}
132static DEVICE_ATTR_RW(enable_c02);
133
134static ssize_t
135max_time_show(struct device *kobj, struct device_attribute *attr, char *buf)
136{
137 u32 ctrl = READ_ONCE(umwait_control_cached);
138
139 return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl));
140}
141
142static ssize_t max_time_store(struct device *kobj,
143 struct device_attribute *attr,
144 const char *buf, size_t count)
145{
146 u32 max_time, ctrl;
147 int ret;
148
149 ret = kstrtou32(buf, 0, &max_time);
150 if (ret)
151 return ret;
152
153 /* bits[1:0] must be zero */
154 if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK)
155 return -EINVAL;
156
157 mutex_lock(&umwait_lock);
158
159 ctrl = READ_ONCE(umwait_control_cached);
160 if (max_time != umwait_ctrl_max_time(ctrl))
161 umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl));
162
163 mutex_unlock(&umwait_lock);
164
165 return count;
166}
167static DEVICE_ATTR_RW(max_time);
168
169static struct attribute *umwait_attrs[] = {
170 &dev_attr_enable_c02.attr,
171 &dev_attr_max_time.attr,
172 NULL
173};
174
175static struct attribute_group umwait_attr_group = {
176 .attrs = umwait_attrs,
177 .name = "umwait_control",
178};
179
180static int __init umwait_init(void)
181{
182 struct device *dev;
183 int ret;
184
185 if (!boot_cpu_has(X86_FEATURE_WAITPKG))
186 return -ENODEV;
187
188 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
189 umwait_cpu_online, NULL);
190
191 register_syscore_ops(&umwait_syscore_ops);
192
193 /*
194 * Add umwait control interface. Ignore failure, so at least the
195 * default values are set up in case the machine manages to boot.
196 */
197 dev = cpu_subsys.dev_root;
198 return sysfs_create_group(&dev->kobj, &umwait_attr_group);
199}
200device_initcall(umwait_init);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 0eda91f8eeac..3c648476d4fb 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void)
157 157
158#ifdef CONFIG_X86_LOCAL_APIC 158#ifdef CONFIG_X86_LOCAL_APIC
159 /* Skip lapic calibration since we know the bus frequency. */ 159 /* Skip lapic calibration since we know the bus frequency. */
160 lapic_timer_frequency = ecx / HZ; 160 lapic_timer_period = ecx / HZ;
161 pr_info("Host bus clock speed read from hypervisor : %u Hz\n", 161 pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
162 ecx); 162 ecx);
163#endif 163#endif
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
new file mode 100644
index 000000000000..8e6f2f4b4afe
--- /dev/null
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -0,0 +1,167 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/sched.h>
3#include <linux/sched/clock.h>
4
5#include <asm/cpufeature.h>
6
7#include "cpu.h"
8
9#define MSR_ZHAOXIN_FCR57 0x00001257
10
11#define ACE_PRESENT (1 << 6)
12#define ACE_ENABLED (1 << 7)
13#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */
14
15#define RNG_PRESENT (1 << 2)
16#define RNG_ENABLED (1 << 3)
17#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */
18
19#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
20#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
21#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
22#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
23#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
24#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
25
26static void init_zhaoxin_cap(struct cpuinfo_x86 *c)
27{
28 u32 lo, hi;
29
30 /* Test for Extended Feature Flags presence */
31 if (cpuid_eax(0xC0000000) >= 0xC0000001) {
32 u32 tmp = cpuid_edx(0xC0000001);
33
34 /* Enable ACE unit, if present and disabled */
35 if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
36 rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
37 /* Enable ACE unit */
38 lo |= ACE_FCR;
39 wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
40 pr_info("CPU: Enabled ACE h/w crypto\n");
41 }
42
43 /* Enable RNG unit, if present and disabled */
44 if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
45 rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
46 /* Enable RNG unit */
47 lo |= RNG_ENABLE;
48 wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
49 pr_info("CPU: Enabled h/w RNG\n");
50 }
51
52 /*
53 * Store Extended Feature Flags as word 5 of the CPU
54 * capability bit array
55 */
56 c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001);
57 }
58
59 if (c->x86 >= 0x6)
60 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
61
62 cpu_detect_cache_sizes(c);
63}
64
65static void early_init_zhaoxin(struct cpuinfo_x86 *c)
66{
67 if (c->x86 >= 0x6)
68 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
69#ifdef CONFIG_X86_64
70 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
71#endif
72 if (c->x86_power & (1 << 8)) {
73 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
74 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
75 }
76
77 if (c->cpuid_level >= 0x00000001) {
78 u32 eax, ebx, ecx, edx;
79
80 cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
81 /*
82 * If HTT (EDX[28]) is set EBX[16:23] contain the number of
83 * apicids which are reserved per package. Store the resulting
84 * shift value for the package management code.
85 */
86 if (edx & (1U << 28))
87 c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
88 }
89
90}
91
92static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c)
93{
94 u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
95
96 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
97 msr_ctl = vmx_msr_high | vmx_msr_low;
98
99 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
100 set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
101 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
102 set_cpu_cap(c, X86_FEATURE_VNMI);
103 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
104 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
105 vmx_msr_low, vmx_msr_high);
106 msr_ctl2 = vmx_msr_high | vmx_msr_low;
107 if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
108 (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
109 set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
110 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
111 set_cpu_cap(c, X86_FEATURE_EPT);
112 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
113 set_cpu_cap(c, X86_FEATURE_VPID);
114 }
115}
116
117static void init_zhaoxin(struct cpuinfo_x86 *c)
118{
119 early_init_zhaoxin(c);
120 init_intel_cacheinfo(c);
121 detect_num_cpu_cores(c);
122#ifdef CONFIG_X86_32
123 detect_ht(c);
124#endif
125
126 if (c->cpuid_level > 9) {
127 unsigned int eax = cpuid_eax(10);
128
129 /*
130 * Check for version and the number of counters
131 * Version(eax[7:0]) can't be 0;
132 * Counters(eax[15:8]) should be greater than 1;
133 */
134 if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
135 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
136 }
137
138 if (c->x86 >= 0x6)
139 init_zhaoxin_cap(c);
140#ifdef CONFIG_X86_64
141 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
142#endif
143
144 if (cpu_has(c, X86_FEATURE_VMX))
145 zhaoxin_detect_vmx_virtcap(c);
146}
147
148#ifdef CONFIG_X86_32
149static unsigned int
150zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size)
151{
152 return size;
153}
154#endif
155
156static const struct cpu_dev zhaoxin_cpu_dev = {
157 .c_vendor = "zhaoxin",
158 .c_ident = { " Shanghai " },
159 .c_early_init = early_init_zhaoxin,
160 .c_init = init_zhaoxin,
161#ifdef CONFIG_X86_32
162 .legacy_cache_size = zhaoxin_size_cache,
163#endif
164 .c_x86_vendor = X86_VENDOR_ZHAOXIN,
165};
166
167cpu_dev_register(zhaoxin_cpu_dev);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 649fbc3fcf9f..12c70840980e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -43,18 +43,6 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
43 */ 43 */
44DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); 44DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
45 45
46static void kernel_fpu_disable(void)
47{
48 WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
49 this_cpu_write(in_kernel_fpu, true);
50}
51
52static void kernel_fpu_enable(void)
53{
54 WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
55 this_cpu_write(in_kernel_fpu, false);
56}
57
58static bool kernel_fpu_disabled(void) 46static bool kernel_fpu_disabled(void)
59{ 47{
60 return this_cpu_read(in_kernel_fpu); 48 return this_cpu_read(in_kernel_fpu);
@@ -94,42 +82,33 @@ bool irq_fpu_usable(void)
94} 82}
95EXPORT_SYMBOL(irq_fpu_usable); 83EXPORT_SYMBOL(irq_fpu_usable);
96 84
97static void __kernel_fpu_begin(void) 85void kernel_fpu_begin(void)
98{ 86{
99 struct fpu *fpu = &current->thread.fpu; 87 preempt_disable();
100 88
101 WARN_ON_FPU(!irq_fpu_usable()); 89 WARN_ON_FPU(!irq_fpu_usable());
90 WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
102 91
103 kernel_fpu_disable(); 92 this_cpu_write(in_kernel_fpu, true);
104 93
105 if (!(current->flags & PF_KTHREAD)) { 94 if (!(current->flags & PF_KTHREAD) &&
106 if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { 95 !test_thread_flag(TIF_NEED_FPU_LOAD)) {
107 set_thread_flag(TIF_NEED_FPU_LOAD); 96 set_thread_flag(TIF_NEED_FPU_LOAD);
108 /* 97 /*
109 * Ignore return value -- we don't care if reg state 98 * Ignore return value -- we don't care if reg state
110 * is clobbered. 99 * is clobbered.
111 */ 100 */
112 copy_fpregs_to_fpstate(fpu); 101 copy_fpregs_to_fpstate(&current->thread.fpu);
113 }
114 } 102 }
115 __cpu_invalidate_fpregs_state(); 103 __cpu_invalidate_fpregs_state();
116} 104}
117
118static void __kernel_fpu_end(void)
119{
120 kernel_fpu_enable();
121}
122
123void kernel_fpu_begin(void)
124{
125 preempt_disable();
126 __kernel_fpu_begin();
127}
128EXPORT_SYMBOL_GPL(kernel_fpu_begin); 105EXPORT_SYMBOL_GPL(kernel_fpu_begin);
129 106
130void kernel_fpu_end(void) 107void kernel_fpu_end(void)
131{ 108{
132 __kernel_fpu_end(); 109 WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
110
111 this_cpu_write(in_kernel_fpu, false);
133 preempt_enable(); 112 preempt_enable();
134} 113}
135EXPORT_SYMBOL_GPL(kernel_fpu_end); 114EXPORT_SYMBOL_GPL(kernel_fpu_end);
@@ -155,7 +134,6 @@ void fpu__save(struct fpu *fpu)
155 trace_x86_fpu_after_save(fpu); 134 trace_x86_fpu_after_save(fpu);
156 fpregs_unlock(); 135 fpregs_unlock();
157} 136}
158EXPORT_SYMBOL_GPL(fpu__save);
159 137
160/* 138/*
161 * Legacy x87 fpstate state init: 139 * Legacy x87 fpstate state init:
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index ef0030e3fe6b..6ce7e0a23268 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -204,12 +204,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
204 */ 204 */
205 205
206 if (!boot_cpu_has(X86_FEATURE_FPU)) { 206 if (!boot_cpu_has(X86_FEATURE_FPU)) {
207 /*
208 * Disable xsave as we do not support it if i387
209 * emulation is enabled.
210 */
211 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
212 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
213 fpu_kernel_xstate_size = sizeof(struct swregs_state); 207 fpu_kernel_xstate_size = sizeof(struct swregs_state);
214 } else { 208 } else {
215 if (boot_cpu_has(X86_FEATURE_FXSR)) 209 if (boot_cpu_has(X86_FEATURE_FXSR))
@@ -252,17 +246,20 @@ static void __init fpu__init_parse_early_param(void)
252 char *argptr = arg; 246 char *argptr = arg;
253 int bit; 247 int bit;
254 248
249#ifdef CONFIG_X86_32
255 if (cmdline_find_option_bool(boot_command_line, "no387")) 250 if (cmdline_find_option_bool(boot_command_line, "no387"))
251#ifdef CONFIG_MATH_EMULATION
256 setup_clear_cpu_cap(X86_FEATURE_FPU); 252 setup_clear_cpu_cap(X86_FEATURE_FPU);
253#else
254 pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
255#endif
257 256
258 if (cmdline_find_option_bool(boot_command_line, "nofxsr")) { 257 if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
259 setup_clear_cpu_cap(X86_FEATURE_FXSR); 258 setup_clear_cpu_cap(X86_FEATURE_FXSR);
260 setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); 259#endif
261 setup_clear_cpu_cap(X86_FEATURE_XMM);
262 }
263 260
264 if (cmdline_find_option_bool(boot_command_line, "noxsave")) 261 if (cmdline_find_option_bool(boot_command_line, "noxsave"))
265 fpu__xstate_clear_all_cpu_caps(); 262 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
266 263
267 if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) 264 if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
268 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 265 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 3c36dd1784db..7b4c52aa929f 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -68,15 +68,6 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
68unsigned int fpu_user_xstate_size; 68unsigned int fpu_user_xstate_size;
69 69
70/* 70/*
71 * Clear all of the X86_FEATURE_* bits that are unavailable
72 * when the CPU has no XSAVE support.
73 */
74void fpu__xstate_clear_all_cpu_caps(void)
75{
76 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
77}
78
79/*
80 * Return whether the system supports a given xfeature. 71 * Return whether the system supports a given xfeature.
81 * 72 *
82 * Also return the name of the (most advanced) feature that the caller requested: 73 * Also return the name of the (most advanced) feature that the caller requested:
@@ -709,7 +700,7 @@ static void fpu__init_disable_system_xstate(void)
709{ 700{
710 xfeatures_mask = 0; 701 xfeatures_mask = 0;
711 cr4_clear_bits(X86_CR4_OSXSAVE); 702 cr4_clear_bits(X86_CR4_OSXSAVE);
712 fpu__xstate_clear_all_cpu_caps(); 703 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
713} 704}
714 705
715/* 706/*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0927bb158ffc..76228525acd0 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -22,6 +22,7 @@
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/list.h> 23#include <linux/list.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/memory.h>
25 26
26#include <trace/syscall.h> 27#include <trace/syscall.h>
27 28
@@ -34,16 +35,25 @@
34#ifdef CONFIG_DYNAMIC_FTRACE 35#ifdef CONFIG_DYNAMIC_FTRACE
35 36
36int ftrace_arch_code_modify_prepare(void) 37int ftrace_arch_code_modify_prepare(void)
38 __acquires(&text_mutex)
37{ 39{
40 /*
41 * Need to grab text_mutex to prevent a race from module loading
42 * and live kernel patching from changing the text permissions while
43 * ftrace has it set to "read/write".
44 */
45 mutex_lock(&text_mutex);
38 set_kernel_text_rw(); 46 set_kernel_text_rw();
39 set_all_modules_text_rw(); 47 set_all_modules_text_rw();
40 return 0; 48 return 0;
41} 49}
42 50
43int ftrace_arch_code_modify_post_process(void) 51int ftrace_arch_code_modify_post_process(void)
52 __releases(&text_mutex)
44{ 53{
45 set_all_modules_text_ro(); 54 set_all_modules_text_ro();
46 set_kernel_text_ro(); 55 set_kernel_text_ro();
56 mutex_unlock(&text_mutex);
47 return 0; 57 return 0;
48} 58}
49 59
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 16b1cbd3a61e..29ffa495bd1c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr,
184 pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); 184 pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
185 185
186 if (la57) { 186 if (la57) {
187 p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); 187 p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
188 physaddr);
188 189
189 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 190 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
190 pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; 191 pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
191 pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; 192 pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
192 193
193 i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; 194 i = physaddr >> P4D_SHIFT;
194 p4d[i + 0] = (pgdval_t)pud + pgtable_flags; 195 p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
195 p4d[i + 1] = (pgdval_t)pud + pgtable_flags; 196 p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
196 } else { 197 } else {
197 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 198 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
198 pgd[i + 0] = (pgdval_t)pud + pgtable_flags; 199 pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
199 pgd[i + 1] = (pgdval_t)pud + pgtable_flags; 200 pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
200 } 201 }
201 202
202 i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; 203 i = physaddr >> PUD_SHIFT;
203 pud[i + 0] = (pudval_t)pmd + pgtable_flags; 204 pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
204 pud[i + 1] = (pudval_t)pmd + pgtable_flags; 205 pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
205 206
206 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; 207 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
207 /* Filter out unsupported __PAGE_KERNEL_* bits: */ 208 /* Filter out unsupported __PAGE_KERNEL_* bits: */
@@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
211 pmd_entry += physaddr; 212 pmd_entry += physaddr;
212 213
213 for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { 214 for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
214 int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; 215 int idx = i + (physaddr >> PMD_SHIFT);
215 pmd[idx] = pmd_entry + i * PMD_SIZE; 216
217 pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
216 } 218 }
217 219
218 /* 220 /*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a0573f2e7763..c43e96a938d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,32 +1,44 @@
1// SPDX-License-Identifier: GPL-2.0-only 1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/clocksource.h>
3#include <linux/clockchips.h> 2#include <linux/clockchips.h>
4#include <linux/interrupt.h> 3#include <linux/interrupt.h>
5#include <linux/irq.h>
6#include <linux/export.h> 4#include <linux/export.h>
7#include <linux/delay.h> 5#include <linux/delay.h>
8#include <linux/errno.h>
9#include <linux/i8253.h>
10#include <linux/slab.h>
11#include <linux/hpet.h> 6#include <linux/hpet.h>
12#include <linux/init.h>
13#include <linux/cpu.h> 7#include <linux/cpu.h>
14#include <linux/pm.h> 8#include <linux/irq.h>
15#include <linux/io.h>
16 9
17#include <asm/cpufeature.h>
18#include <asm/irqdomain.h>
19#include <asm/fixmap.h>
20#include <asm/hpet.h> 10#include <asm/hpet.h>
21#include <asm/time.h> 11#include <asm/time.h>
22 12
23#define HPET_MASK CLOCKSOURCE_MASK(32) 13#undef pr_fmt
14#define pr_fmt(fmt) "hpet: " fmt
24 15
25#define HPET_DEV_USED_BIT 2 16enum hpet_mode {
26#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) 17 HPET_MODE_UNUSED,
27#define HPET_DEV_VALID 0x8 18 HPET_MODE_LEGACY,
28#define HPET_DEV_FSB_CAP 0x1000 19 HPET_MODE_CLOCKEVT,
29#define HPET_DEV_PERI_CAP 0x2000 20 HPET_MODE_DEVICE,
21};
22
23struct hpet_channel {
24 struct clock_event_device evt;
25 unsigned int num;
26 unsigned int cpu;
27 unsigned int irq;
28 unsigned int in_use;
29 enum hpet_mode mode;
30 unsigned int boot_cfg;
31 char name[10];
32};
33
34struct hpet_base {
35 unsigned int nr_channels;
36 unsigned int nr_clockevents;
37 unsigned int boot_cfg;
38 struct hpet_channel *channels;
39};
40
41#define HPET_MASK CLOCKSOURCE_MASK(32)
30 42
31#define HPET_MIN_CYCLES 128 43#define HPET_MIN_CYCLES 128
32#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) 44#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
@@ -39,22 +51,25 @@ u8 hpet_blockid; /* OS timer block num */
39bool hpet_msi_disable; 51bool hpet_msi_disable;
40 52
41#ifdef CONFIG_PCI_MSI 53#ifdef CONFIG_PCI_MSI
42static unsigned int hpet_num_timers; 54static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel);
55static struct irq_domain *hpet_domain;
43#endif 56#endif
57
44static void __iomem *hpet_virt_address; 58static void __iomem *hpet_virt_address;
45 59
46struct hpet_dev { 60static struct hpet_base hpet_base;
47 struct clock_event_device evt; 61
48 unsigned int num; 62static bool hpet_legacy_int_enabled;
49 int cpu; 63static unsigned long hpet_freq;
50 unsigned int irq;
51 unsigned int flags;
52 char name[10];
53};
54 64
55static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) 65bool boot_hpet_disable;
66bool hpet_force_user;
67static bool hpet_verbose;
68
69static inline
70struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt)
56{ 71{
57 return container_of(evtdev, struct hpet_dev, evt); 72 return container_of(evt, struct hpet_channel, evt);
58} 73}
59 74
60inline unsigned int hpet_readl(unsigned int a) 75inline unsigned int hpet_readl(unsigned int a)
@@ -67,10 +82,6 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
67 writel(d, hpet_virt_address + a); 82 writel(d, hpet_virt_address + a);
68} 83}
69 84
70#ifdef CONFIG_X86_64
71#include <asm/pgtable.h>
72#endif
73
74static inline void hpet_set_mapping(void) 85static inline void hpet_set_mapping(void)
75{ 86{
76 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); 87 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
@@ -85,10 +96,6 @@ static inline void hpet_clear_mapping(void)
85/* 96/*
86 * HPET command line enable / disable 97 * HPET command line enable / disable
87 */ 98 */
88bool boot_hpet_disable;
89bool hpet_force_user;
90static bool hpet_verbose;
91
92static int __init hpet_setup(char *str) 99static int __init hpet_setup(char *str)
93{ 100{
94 while (str) { 101 while (str) {
@@ -120,13 +127,8 @@ static inline int is_hpet_capable(void)
120 return !boot_hpet_disable && hpet_address; 127 return !boot_hpet_disable && hpet_address;
121} 128}
122 129
123/*
124 * HPET timer interrupt enable / disable
125 */
126static bool hpet_legacy_int_enabled;
127
128/** 130/**
129 * is_hpet_enabled - check whether the hpet timer interrupt is enabled 131 * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled
130 */ 132 */
131int is_hpet_enabled(void) 133int is_hpet_enabled(void)
132{ 134{
@@ -136,32 +138,36 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
136 138
137static void _hpet_print_config(const char *function, int line) 139static void _hpet_print_config(const char *function, int line)
138{ 140{
139 u32 i, timers, l, h; 141 u32 i, id, period, cfg, status, channels, l, h;
140 printk(KERN_INFO "hpet: %s(%d):\n", function, line); 142
141 l = hpet_readl(HPET_ID); 143 pr_info("%s(%d):\n", function, line);
142 h = hpet_readl(HPET_PERIOD); 144
143 timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; 145 id = hpet_readl(HPET_ID);
144 printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h); 146 period = hpet_readl(HPET_PERIOD);
145 l = hpet_readl(HPET_CFG); 147 pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period);
146 h = hpet_readl(HPET_STATUS); 148
147 printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h); 149 cfg = hpet_readl(HPET_CFG);
150 status = hpet_readl(HPET_STATUS);
151 pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status);
152
148 l = hpet_readl(HPET_COUNTER); 153 l = hpet_readl(HPET_COUNTER);
149 h = hpet_readl(HPET_COUNTER+4); 154 h = hpet_readl(HPET_COUNTER+4);
150 printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); 155 pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
156
157 channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
151 158
152 for (i = 0; i < timers; i++) { 159 for (i = 0; i < channels; i++) {
153 l = hpet_readl(HPET_Tn_CFG(i)); 160 l = hpet_readl(HPET_Tn_CFG(i));
154 h = hpet_readl(HPET_Tn_CFG(i)+4); 161 h = hpet_readl(HPET_Tn_CFG(i)+4);
155 printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", 162 pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h);
156 i, l, h); 163
157 l = hpet_readl(HPET_Tn_CMP(i)); 164 l = hpet_readl(HPET_Tn_CMP(i));
158 h = hpet_readl(HPET_Tn_CMP(i)+4); 165 h = hpet_readl(HPET_Tn_CMP(i)+4);
159 printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", 166 pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h);
160 i, l, h); 167
161 l = hpet_readl(HPET_Tn_ROUTE(i)); 168 l = hpet_readl(HPET_Tn_ROUTE(i));
162 h = hpet_readl(HPET_Tn_ROUTE(i)+4); 169 h = hpet_readl(HPET_Tn_ROUTE(i)+4);
163 printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", 170 pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h);
164 i, l, h);
165 } 171 }
166} 172}
167 173
@@ -172,31 +178,20 @@ do { \
172} while (0) 178} while (0)
173 179
174/* 180/*
175 * When the hpet driver (/dev/hpet) is enabled, we need to reserve 181 * When the HPET driver (/dev/hpet) is enabled, we need to reserve
176 * timer 0 and timer 1 in case of RTC emulation. 182 * timer 0 and timer 1 in case of RTC emulation.
177 */ 183 */
178#ifdef CONFIG_HPET 184#ifdef CONFIG_HPET
179 185
180static void hpet_reserve_msi_timers(struct hpet_data *hd); 186static void __init hpet_reserve_platform_timers(void)
181
182static void hpet_reserve_platform_timers(unsigned int id)
183{ 187{
184 struct hpet __iomem *hpet = hpet_virt_address;
185 struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
186 unsigned int nrtimers, i;
187 struct hpet_data hd; 188 struct hpet_data hd;
188 189 unsigned int i;
189 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
190 190
191 memset(&hd, 0, sizeof(hd)); 191 memset(&hd, 0, sizeof(hd));
192 hd.hd_phys_address = hpet_address; 192 hd.hd_phys_address = hpet_address;
193 hd.hd_address = hpet; 193 hd.hd_address = hpet_virt_address;
194 hd.hd_nirqs = nrtimers; 194 hd.hd_nirqs = hpet_base.nr_channels;
195 hpet_reserve_timer(&hd, 0);
196
197#ifdef CONFIG_HPET_EMULATE_RTC
198 hpet_reserve_timer(&hd, 1);
199#endif
200 195
201 /* 196 /*
202 * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 197 * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
@@ -206,30 +201,52 @@ static void hpet_reserve_platform_timers(unsigned int id)
206 hd.hd_irq[0] = HPET_LEGACY_8254; 201 hd.hd_irq[0] = HPET_LEGACY_8254;
207 hd.hd_irq[1] = HPET_LEGACY_RTC; 202 hd.hd_irq[1] = HPET_LEGACY_RTC;
208 203
209 for (i = 2; i < nrtimers; timer++, i++) { 204 for (i = 0; i < hpet_base.nr_channels; i++) {
210 hd.hd_irq[i] = (readl(&timer->hpet_config) & 205 struct hpet_channel *hc = hpet_base.channels + i;
211 Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; 206
212 } 207 if (i >= 2)
208 hd.hd_irq[i] = hc->irq;
213 209
214 hpet_reserve_msi_timers(&hd); 210 switch (hc->mode) {
211 case HPET_MODE_UNUSED:
212 case HPET_MODE_DEVICE:
213 hc->mode = HPET_MODE_DEVICE;
214 break;
215 case HPET_MODE_CLOCKEVT:
216 case HPET_MODE_LEGACY:
217 hpet_reserve_timer(&hd, hc->num);
218 break;
219 }
220 }
215 221
216 hpet_alloc(&hd); 222 hpet_alloc(&hd);
223}
217 224
225static void __init hpet_select_device_channel(void)
226{
227 int i;
228
229 for (i = 0; i < hpet_base.nr_channels; i++) {
230 struct hpet_channel *hc = hpet_base.channels + i;
231
232 /* Associate the first unused channel to /dev/hpet */
233 if (hc->mode == HPET_MODE_UNUSED) {
234 hc->mode = HPET_MODE_DEVICE;
235 return;
236 }
237 }
218} 238}
239
219#else 240#else
220static void hpet_reserve_platform_timers(unsigned int id) { } 241static inline void hpet_reserve_platform_timers(void) { }
242static inline void hpet_select_device_channel(void) {}
221#endif 243#endif
222 244
223/* 245/* Common HPET functions */
224 * Common hpet info
225 */
226static unsigned long hpet_freq;
227
228static struct clock_event_device hpet_clockevent;
229
230static void hpet_stop_counter(void) 246static void hpet_stop_counter(void)
231{ 247{
232 u32 cfg = hpet_readl(HPET_CFG); 248 u32 cfg = hpet_readl(HPET_CFG);
249
233 cfg &= ~HPET_CFG_ENABLE; 250 cfg &= ~HPET_CFG_ENABLE;
234 hpet_writel(cfg, HPET_CFG); 251 hpet_writel(cfg, HPET_CFG);
235} 252}
@@ -243,6 +260,7 @@ static void hpet_reset_counter(void)
243static void hpet_start_counter(void) 260static void hpet_start_counter(void)
244{ 261{
245 unsigned int cfg = hpet_readl(HPET_CFG); 262 unsigned int cfg = hpet_readl(HPET_CFG);
263
246 cfg |= HPET_CFG_ENABLE; 264 cfg |= HPET_CFG_ENABLE;
247 hpet_writel(cfg, HPET_CFG); 265 hpet_writel(cfg, HPET_CFG);
248} 266}
@@ -274,24 +292,9 @@ static void hpet_enable_legacy_int(void)
274 hpet_legacy_int_enabled = true; 292 hpet_legacy_int_enabled = true;
275} 293}
276 294
277static void hpet_legacy_clockevent_register(void) 295static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt)
278{
279 /* Start HPET legacy interrupts */
280 hpet_enable_legacy_int();
281
282 /*
283 * Start hpet with the boot cpu mask and make it
284 * global after the IO_APIC has been initialized.
285 */
286 hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index);
287 clockevents_config_and_register(&hpet_clockevent, hpet_freq,
288 HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
289 global_clock_event = &hpet_clockevent;
290 printk(KERN_DEBUG "hpet clockevent registered\n");
291}
292
293static int hpet_set_periodic(struct clock_event_device *evt, int timer)
294{ 296{
297 unsigned int channel = clockevent_to_channel(evt)->num;
295 unsigned int cfg, cmp, now; 298 unsigned int cfg, cmp, now;
296 uint64_t delta; 299 uint64_t delta;
297 300
@@ -300,11 +303,11 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer)
300 delta >>= evt->shift; 303 delta >>= evt->shift;
301 now = hpet_readl(HPET_COUNTER); 304 now = hpet_readl(HPET_COUNTER);
302 cmp = now + (unsigned int)delta; 305 cmp = now + (unsigned int)delta;
303 cfg = hpet_readl(HPET_Tn_CFG(timer)); 306 cfg = hpet_readl(HPET_Tn_CFG(channel));
304 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | 307 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
305 HPET_TN_32BIT; 308 HPET_TN_32BIT;
306 hpet_writel(cfg, HPET_Tn_CFG(timer)); 309 hpet_writel(cfg, HPET_Tn_CFG(channel));
307 hpet_writel(cmp, HPET_Tn_CMP(timer)); 310 hpet_writel(cmp, HPET_Tn_CMP(channel));
308 udelay(1); 311 udelay(1);
309 /* 312 /*
310 * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL 313 * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
@@ -313,52 +316,55 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer)
313 * (See AMD-8111 HyperTransport I/O Hub Data Sheet, 316 * (See AMD-8111 HyperTransport I/O Hub Data Sheet,
314 * Publication # 24674) 317 * Publication # 24674)
315 */ 318 */
316 hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer)); 319 hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel));
317 hpet_start_counter(); 320 hpet_start_counter();
318 hpet_print_config(); 321 hpet_print_config();
319 322
320 return 0; 323 return 0;
321} 324}
322 325
323static int hpet_set_oneshot(struct clock_event_device *evt, int timer) 326static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt)
324{ 327{
328 unsigned int channel = clockevent_to_channel(evt)->num;
325 unsigned int cfg; 329 unsigned int cfg;
326 330
327 cfg = hpet_readl(HPET_Tn_CFG(timer)); 331 cfg = hpet_readl(HPET_Tn_CFG(channel));
328 cfg &= ~HPET_TN_PERIODIC; 332 cfg &= ~HPET_TN_PERIODIC;
329 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; 333 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
330 hpet_writel(cfg, HPET_Tn_CFG(timer)); 334 hpet_writel(cfg, HPET_Tn_CFG(channel));
331 335
332 return 0; 336 return 0;
333} 337}
334 338
335static int hpet_shutdown(struct clock_event_device *evt, int timer) 339static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt)
336{ 340{
341 unsigned int channel = clockevent_to_channel(evt)->num;
337 unsigned int cfg; 342 unsigned int cfg;
338 343
339 cfg = hpet_readl(HPET_Tn_CFG(timer)); 344 cfg = hpet_readl(HPET_Tn_CFG(channel));
340 cfg &= ~HPET_TN_ENABLE; 345 cfg &= ~HPET_TN_ENABLE;
341 hpet_writel(cfg, HPET_Tn_CFG(timer)); 346 hpet_writel(cfg, HPET_Tn_CFG(channel));
342 347
343 return 0; 348 return 0;
344} 349}
345 350
346static int hpet_resume(struct clock_event_device *evt) 351static int hpet_clkevt_legacy_resume(struct clock_event_device *evt)
347{ 352{
348 hpet_enable_legacy_int(); 353 hpet_enable_legacy_int();
349 hpet_print_config(); 354 hpet_print_config();
350 return 0; 355 return 0;
351} 356}
352 357
353static int hpet_next_event(unsigned long delta, 358static int
354 struct clock_event_device *evt, int timer) 359hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt)
355{ 360{
361 unsigned int channel = clockevent_to_channel(evt)->num;
356 u32 cnt; 362 u32 cnt;
357 s32 res; 363 s32 res;
358 364
359 cnt = hpet_readl(HPET_COUNTER); 365 cnt = hpet_readl(HPET_COUNTER);
360 cnt += (u32) delta; 366 cnt += (u32) delta;
361 hpet_writel(cnt, HPET_Tn_CMP(timer)); 367 hpet_writel(cnt, HPET_Tn_CMP(channel));
362 368
363 /* 369 /*
364 * HPETs are a complete disaster. The compare register is 370 * HPETs are a complete disaster. The compare register is
@@ -387,360 +393,250 @@ static int hpet_next_event(unsigned long delta,
387 return res < HPET_MIN_CYCLES ? -ETIME : 0; 393 return res < HPET_MIN_CYCLES ? -ETIME : 0;
388} 394}
389 395
390static int hpet_legacy_shutdown(struct clock_event_device *evt) 396static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating)
391{ 397{
392 return hpet_shutdown(evt, 0); 398 struct clock_event_device *evt = &hc->evt;
393}
394 399
395static int hpet_legacy_set_oneshot(struct clock_event_device *evt) 400 evt->rating = rating;
396{ 401 evt->irq = hc->irq;
397 return hpet_set_oneshot(evt, 0); 402 evt->name = hc->name;
398} 403 evt->cpumask = cpumask_of(hc->cpu);
404 evt->set_state_oneshot = hpet_clkevt_set_state_oneshot;
405 evt->set_next_event = hpet_clkevt_set_next_event;
406 evt->set_state_shutdown = hpet_clkevt_set_state_shutdown;
399 407
400static int hpet_legacy_set_periodic(struct clock_event_device *evt) 408 evt->features = CLOCK_EVT_FEAT_ONESHOT;
401{ 409 if (hc->boot_cfg & HPET_TN_PERIODIC) {
402 return hpet_set_periodic(evt, 0); 410 evt->features |= CLOCK_EVT_FEAT_PERIODIC;
411 evt->set_state_periodic = hpet_clkevt_set_state_periodic;
412 }
403} 413}
404 414
405static int hpet_legacy_resume(struct clock_event_device *evt) 415static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc)
406{ 416{
407 return hpet_resume(evt); 417 /*
408} 418 * Start HPET with the boot CPU's cpumask and make it global after
419 * the IO_APIC has been initialized.
420 */
421 hc->cpu = boot_cpu_data.cpu_index;
422 strncpy(hc->name, "hpet", sizeof(hc->name));
423 hpet_init_clockevent(hc, 50);
409 424
410static int hpet_legacy_next_event(unsigned long delta, 425 hc->evt.tick_resume = hpet_clkevt_legacy_resume;
411 struct clock_event_device *evt)
412{
413 return hpet_next_event(delta, evt, 0);
414}
415 426
416/* 427 /*
417 * The hpet clock event device 428 * Legacy horrors and sins from the past. HPET used periodic mode
418 */ 429 * unconditionally forever on the legacy channel 0. Removing the
419static struct clock_event_device hpet_clockevent = { 430 * below hack and using the conditional in hpet_init_clockevent()
420 .name = "hpet", 431 * makes at least Qemu and one hardware machine fail to boot.
421 .features = CLOCK_EVT_FEAT_PERIODIC | 432 * There are two issues which cause the boot failure:
422 CLOCK_EVT_FEAT_ONESHOT, 433 *
423 .set_state_periodic = hpet_legacy_set_periodic, 434 * #1 After the timer delivery test in IOAPIC and the IOAPIC setup
424 .set_state_oneshot = hpet_legacy_set_oneshot, 435 * the next interrupt is not delivered despite the HPET channel
425 .set_state_shutdown = hpet_legacy_shutdown, 436 * being programmed correctly. Reprogramming the HPET after
426 .tick_resume = hpet_legacy_resume, 437 * switching to IOAPIC makes it work again. After fixing this,
427 .set_next_event = hpet_legacy_next_event, 438 * the next issue surfaces:
428 .irq = 0, 439 *
429 .rating = 50, 440 * #2 Due to the unconditional periodic mode availability the Local
430}; 441 * APIC timer calibration can hijack the global clockevents
442 * event handler without causing damage. Using oneshot at this
443 * stage makes if hang because the HPET does not get
444 * reprogrammed due to the handler hijacking. Duh, stupid me!
445 *
446 * Both issues require major surgery and especially the kick HPET
447 * again after enabling IOAPIC results in really nasty hackery.
448 * This 'assume periodic works' magic has survived since HPET
449 * support got added, so it's questionable whether this should be
450 * fixed. Both Qemu and the failing hardware machine support
451 * periodic mode despite the fact that both don't advertise it in
452 * the configuration register and both need that extra kick after
453 * switching to IOAPIC. Seems to be a feature...
454 */
455 hc->evt.features |= CLOCK_EVT_FEAT_PERIODIC;
456 hc->evt.set_state_periodic = hpet_clkevt_set_state_periodic;
457
458 /* Start HPET legacy interrupts */
459 hpet_enable_legacy_int();
460
461 clockevents_config_and_register(&hc->evt, hpet_freq,
462 HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
463 global_clock_event = &hc->evt;
464 pr_debug("Clockevent registered\n");
465}
431 466
432/* 467/*
433 * HPET MSI Support 468 * HPET MSI Support
434 */ 469 */
435#ifdef CONFIG_PCI_MSI 470#ifdef CONFIG_PCI_MSI
436 471
437static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
438static struct hpet_dev *hpet_devs;
439static struct irq_domain *hpet_domain;
440
441void hpet_msi_unmask(struct irq_data *data) 472void hpet_msi_unmask(struct irq_data *data)
442{ 473{
443 struct hpet_dev *hdev = irq_data_get_irq_handler_data(data); 474 struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
444 unsigned int cfg; 475 unsigned int cfg;
445 476
446 /* unmask it */ 477 cfg = hpet_readl(HPET_Tn_CFG(hc->num));
447 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
448 cfg |= HPET_TN_ENABLE | HPET_TN_FSB; 478 cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
449 hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); 479 hpet_writel(cfg, HPET_Tn_CFG(hc->num));
450} 480}
451 481
452void hpet_msi_mask(struct irq_data *data) 482void hpet_msi_mask(struct irq_data *data)
453{ 483{
454 struct hpet_dev *hdev = irq_data_get_irq_handler_data(data); 484 struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
455 unsigned int cfg; 485 unsigned int cfg;
456 486
457 /* mask it */ 487 cfg = hpet_readl(HPET_Tn_CFG(hc->num));
458 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
459 cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB); 488 cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
460 hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); 489 hpet_writel(cfg, HPET_Tn_CFG(hc->num));
461}
462
463void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg)
464{
465 hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
466 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
467} 490}
468 491
469void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg) 492void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg)
470{ 493{
471 msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); 494 hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num));
472 msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); 495 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4);
473 msg->address_hi = 0;
474} 496}
475 497
476static int hpet_msi_shutdown(struct clock_event_device *evt) 498static int hpet_clkevt_msi_resume(struct clock_event_device *evt)
477{ 499{
478 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); 500 struct hpet_channel *hc = clockevent_to_channel(evt);
479 501 struct irq_data *data = irq_get_irq_data(hc->irq);
480 return hpet_shutdown(evt, hdev->num);
481}
482
483static int hpet_msi_set_oneshot(struct clock_event_device *evt)
484{
485 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
486
487 return hpet_set_oneshot(evt, hdev->num);
488}
489
490static int hpet_msi_set_periodic(struct clock_event_device *evt)
491{
492 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
493
494 return hpet_set_periodic(evt, hdev->num);
495}
496
497static int hpet_msi_resume(struct clock_event_device *evt)
498{
499 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
500 struct irq_data *data = irq_get_irq_data(hdev->irq);
501 struct msi_msg msg; 502 struct msi_msg msg;
502 503
503 /* Restore the MSI msg and unmask the interrupt */ 504 /* Restore the MSI msg and unmask the interrupt */
504 irq_chip_compose_msi_msg(data, &msg); 505 irq_chip_compose_msi_msg(data, &msg);
505 hpet_msi_write(hdev, &msg); 506 hpet_msi_write(hc, &msg);
506 hpet_msi_unmask(data); 507 hpet_msi_unmask(data);
507 return 0; 508 return 0;
508} 509}
509 510
510static int hpet_msi_next_event(unsigned long delta, 511static irqreturn_t hpet_msi_interrupt_handler(int irq, void *data)
511 struct clock_event_device *evt)
512{
513 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
514 return hpet_next_event(delta, evt, hdev->num);
515}
516
517static irqreturn_t hpet_interrupt_handler(int irq, void *data)
518{ 512{
519 struct hpet_dev *dev = (struct hpet_dev *)data; 513 struct hpet_channel *hc = data;
520 struct clock_event_device *hevt = &dev->evt; 514 struct clock_event_device *evt = &hc->evt;
521 515
522 if (!hevt->event_handler) { 516 if (!evt->event_handler) {
523 printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n", 517 pr_info("Spurious interrupt HPET channel %d\n", hc->num);
524 dev->num);
525 return IRQ_HANDLED; 518 return IRQ_HANDLED;
526 } 519 }
527 520
528 hevt->event_handler(hevt); 521 evt->event_handler(evt);
529 return IRQ_HANDLED; 522 return IRQ_HANDLED;
530} 523}
531 524
532static int hpet_setup_irq(struct hpet_dev *dev) 525static int hpet_setup_msi_irq(struct hpet_channel *hc)
533{ 526{
534 527 if (request_irq(hc->irq, hpet_msi_interrupt_handler,
535 if (request_irq(dev->irq, hpet_interrupt_handler,
536 IRQF_TIMER | IRQF_NOBALANCING, 528 IRQF_TIMER | IRQF_NOBALANCING,
537 dev->name, dev)) 529 hc->name, hc))
538 return -1; 530 return -1;
539 531
540 disable_irq(dev->irq); 532 disable_irq(hc->irq);
541 irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); 533 irq_set_affinity(hc->irq, cpumask_of(hc->cpu));
542 enable_irq(dev->irq); 534 enable_irq(hc->irq);
543 535
544 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", 536 pr_debug("%s irq %u for MSI\n", hc->name, hc->irq);
545 dev->name, dev->irq);
546 537
547 return 0; 538 return 0;
548} 539}
549 540
550/* This should be called in specific @cpu */ 541/* Invoked from the hotplug callback on @cpu */
551static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) 542static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu)
552{ 543{
553 struct clock_event_device *evt = &hdev->evt; 544 struct clock_event_device *evt = &hc->evt;
554
555 WARN_ON(cpu != smp_processor_id());
556 if (!(hdev->flags & HPET_DEV_VALID))
557 return;
558
559 hdev->cpu = cpu;
560 per_cpu(cpu_hpet_dev, cpu) = hdev;
561 evt->name = hdev->name;
562 hpet_setup_irq(hdev);
563 evt->irq = hdev->irq;
564 545
565 evt->rating = 110; 546 hc->cpu = cpu;
566 evt->features = CLOCK_EVT_FEAT_ONESHOT; 547 per_cpu(cpu_hpet_channel, cpu) = hc;
567 if (hdev->flags & HPET_DEV_PERI_CAP) { 548 hpet_setup_msi_irq(hc);
568 evt->features |= CLOCK_EVT_FEAT_PERIODIC;
569 evt->set_state_periodic = hpet_msi_set_periodic;
570 }
571 549
572 evt->set_state_shutdown = hpet_msi_shutdown; 550 hpet_init_clockevent(hc, 110);
573 evt->set_state_oneshot = hpet_msi_set_oneshot; 551 evt->tick_resume = hpet_clkevt_msi_resume;
574 evt->tick_resume = hpet_msi_resume;
575 evt->set_next_event = hpet_msi_next_event;
576 evt->cpumask = cpumask_of(hdev->cpu);
577 552
578 clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, 553 clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
579 0x7FFFFFFF); 554 0x7FFFFFFF);
580} 555}
581 556
582#ifdef CONFIG_HPET 557static struct hpet_channel *hpet_get_unused_clockevent(void)
583/* Reserve at least one timer for userspace (/dev/hpet) */
584#define RESERVE_TIMERS 1
585#else
586#define RESERVE_TIMERS 0
587#endif
588
589static void hpet_msi_capability_lookup(unsigned int start_timer)
590{ 558{
591 unsigned int id; 559 int i;
592 unsigned int num_timers;
593 unsigned int num_timers_used = 0;
594 int i, irq;
595
596 if (hpet_msi_disable)
597 return;
598
599 if (boot_cpu_has(X86_FEATURE_ARAT))
600 return;
601 id = hpet_readl(HPET_ID);
602
603 num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
604 num_timers++; /* Value read out starts from 0 */
605 hpet_print_config();
606
607 hpet_domain = hpet_create_irq_domain(hpet_blockid);
608 if (!hpet_domain)
609 return;
610
611 hpet_devs = kcalloc(num_timers, sizeof(struct hpet_dev), GFP_KERNEL);
612 if (!hpet_devs)
613 return;
614
615 hpet_num_timers = num_timers;
616
617 for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
618 struct hpet_dev *hdev = &hpet_devs[num_timers_used];
619 unsigned int cfg = hpet_readl(HPET_Tn_CFG(i));
620
621 /* Only consider HPET timer with MSI support */
622 if (!(cfg & HPET_TN_FSB_CAP))
623 continue;
624 560
625 hdev->flags = 0; 561 for (i = 0; i < hpet_base.nr_channels; i++) {
626 if (cfg & HPET_TN_PERIODIC_CAP) 562 struct hpet_channel *hc = hpet_base.channels + i;
627 hdev->flags |= HPET_DEV_PERI_CAP;
628 sprintf(hdev->name, "hpet%d", i);
629 hdev->num = i;
630 563
631 irq = hpet_assign_irq(hpet_domain, hdev, hdev->num); 564 if (hc->mode != HPET_MODE_CLOCKEVT || hc->in_use)
632 if (irq <= 0)
633 continue; 565 continue;
634 566 hc->in_use = 1;
635 hdev->irq = irq; 567 return hc;
636 hdev->flags |= HPET_DEV_FSB_CAP;
637 hdev->flags |= HPET_DEV_VALID;
638 num_timers_used++;
639 if (num_timers_used == num_possible_cpus())
640 break;
641 } 568 }
642 569 return NULL;
643 printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
644 num_timers, num_timers_used);
645} 570}
646 571
647#ifdef CONFIG_HPET 572static int hpet_cpuhp_online(unsigned int cpu)
648static void hpet_reserve_msi_timers(struct hpet_data *hd)
649{ 573{
650 int i; 574 struct hpet_channel *hc = hpet_get_unused_clockevent();
651
652 if (!hpet_devs)
653 return;
654 575
655 for (i = 0; i < hpet_num_timers; i++) { 576 if (hc)
656 struct hpet_dev *hdev = &hpet_devs[i]; 577 init_one_hpet_msi_clockevent(hc, cpu);
578 return 0;
579}
657 580
658 if (!(hdev->flags & HPET_DEV_VALID)) 581static int hpet_cpuhp_dead(unsigned int cpu)
659 continue; 582{
583 struct hpet_channel *hc = per_cpu(cpu_hpet_channel, cpu);
660 584
661 hd->hd_irq[hdev->num] = hdev->irq; 585 if (!hc)
662 hpet_reserve_timer(hd, hdev->num); 586 return 0;
663 } 587 free_irq(hc->irq, hc);
588 hc->in_use = 0;
589 per_cpu(cpu_hpet_channel, cpu) = NULL;
590 return 0;
664} 591}
665#endif
666 592
667static struct hpet_dev *hpet_get_unused_timer(void) 593static void __init hpet_select_clockevents(void)
668{ 594{
669 int i; 595 unsigned int i;
670 596
671 if (!hpet_devs) 597 hpet_base.nr_clockevents = 0;
672 return NULL;
673 598
674 for (i = 0; i < hpet_num_timers; i++) { 599 /* No point if MSI is disabled or CPU has an Always Runing APIC Timer */
675 struct hpet_dev *hdev = &hpet_devs[i]; 600 if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT))
601 return;
676 602
677 if (!(hdev->flags & HPET_DEV_VALID)) 603 hpet_print_config();
678 continue;
679 if (test_and_set_bit(HPET_DEV_USED_BIT,
680 (unsigned long *)&hdev->flags))
681 continue;
682 return hdev;
683 }
684 return NULL;
685}
686 604
687struct hpet_work_struct { 605 hpet_domain = hpet_create_irq_domain(hpet_blockid);
688 struct delayed_work work; 606 if (!hpet_domain)
689 struct completion complete; 607 return;
690};
691 608
692static void hpet_work(struct work_struct *w) 609 for (i = 0; i < hpet_base.nr_channels; i++) {
693{ 610 struct hpet_channel *hc = hpet_base.channels + i;
694 struct hpet_dev *hdev; 611 int irq;
695 int cpu = smp_processor_id();
696 struct hpet_work_struct *hpet_work;
697 612
698 hpet_work = container_of(w, struct hpet_work_struct, work.work); 613 if (hc->mode != HPET_MODE_UNUSED)
614 continue;
699 615
700 hdev = hpet_get_unused_timer(); 616 /* Only consider HPET channel with MSI support */
701 if (hdev) 617 if (!(hc->boot_cfg & HPET_TN_FSB_CAP))
702 init_one_hpet_msi_clockevent(hdev, cpu); 618 continue;
703 619
704 complete(&hpet_work->complete); 620 sprintf(hc->name, "hpet%d", i);
705}
706 621
707static int hpet_cpuhp_online(unsigned int cpu) 622 irq = hpet_assign_irq(hpet_domain, hc, hc->num);
708{ 623 if (irq <= 0)
709 struct hpet_work_struct work; 624 continue;
710
711 INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
712 init_completion(&work.complete);
713 /* FIXME: add schedule_work_on() */
714 schedule_delayed_work_on(cpu, &work.work, 0);
715 wait_for_completion(&work.complete);
716 destroy_delayed_work_on_stack(&work.work);
717 return 0;
718}
719 625
720static int hpet_cpuhp_dead(unsigned int cpu) 626 hc->irq = irq;
721{ 627 hc->mode = HPET_MODE_CLOCKEVT;
722 struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
723 628
724 if (!hdev) 629 if (++hpet_base.nr_clockevents == num_possible_cpus())
725 return 0; 630 break;
726 free_irq(hdev->irq, hdev); 631 }
727 hdev->flags &= ~HPET_DEV_USED;
728 per_cpu(cpu_hpet_dev, cpu) = NULL;
729 return 0;
730}
731#else
732 632
733static void hpet_msi_capability_lookup(unsigned int start_timer) 633 pr_info("%d channels of %d reserved for per-cpu timers\n",
734{ 634 hpet_base.nr_channels, hpet_base.nr_clockevents);
735 return;
736} 635}
737 636
738#ifdef CONFIG_HPET 637#else
739static void hpet_reserve_msi_timers(struct hpet_data *hd) 638
740{ 639static inline void hpet_select_clockevents(void) { }
741 return;
742}
743#endif
744 640
745#define hpet_cpuhp_online NULL 641#define hpet_cpuhp_online NULL
746#define hpet_cpuhp_dead NULL 642#define hpet_cpuhp_dead NULL
@@ -754,10 +650,10 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
754/* 650/*
755 * Reading the HPET counter is a very slow operation. If a large number of 651 * Reading the HPET counter is a very slow operation. If a large number of
756 * CPUs are trying to access the HPET counter simultaneously, it can cause 652 * CPUs are trying to access the HPET counter simultaneously, it can cause
757 * massive delay and slow down system performance dramatically. This may 653 * massive delays and slow down system performance dramatically. This may
758 * happen when HPET is the default clock source instead of TSC. For a 654 * happen when HPET is the default clock source instead of TSC. For a
759 * really large system with hundreds of CPUs, the slowdown may be so 655 * really large system with hundreds of CPUs, the slowdown may be so
760 * severe that it may actually crash the system because of a NMI watchdog 656 * severe, that it can actually crash the system because of a NMI watchdog
761 * soft lockup, for example. 657 * soft lockup, for example.
762 * 658 *
763 * If multiple CPUs are trying to access the HPET counter at the same time, 659 * If multiple CPUs are trying to access the HPET counter at the same time,
@@ -766,10 +662,9 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
766 * 662 *
767 * This special feature is only enabled on x86-64 systems. It is unlikely 663 * This special feature is only enabled on x86-64 systems. It is unlikely
768 * that 32-bit x86 systems will have enough CPUs to require this feature 664 * that 32-bit x86 systems will have enough CPUs to require this feature
769 * with its associated locking overhead. And we also need 64-bit atomic 665 * with its associated locking overhead. We also need 64-bit atomic read.
770 * read.
771 * 666 *
772 * The lock and the hpet value are stored together and can be read in a 667 * The lock and the HPET value are stored together and can be read in a
773 * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t 668 * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
774 * is 32 bits in size. 669 * is 32 bits in size.
775 */ 670 */
@@ -858,15 +753,40 @@ static struct clocksource clocksource_hpet = {
858 .resume = hpet_resume_counter, 753 .resume = hpet_resume_counter,
859}; 754};
860 755
861static int hpet_clocksource_register(void) 756/*
757 * AMD SB700 based systems with spread spectrum enabled use a SMM based
758 * HPET emulation to provide proper frequency setting.
759 *
760 * On such systems the SMM code is initialized with the first HPET register
761 * access and takes some time to complete. During this time the config
762 * register reads 0xffffffff. We check for max 1000 loops whether the
763 * config register reads a non-0xffffffff value to make sure that the
764 * HPET is up and running before we proceed any further.
765 *
766 * A counting loop is safe, as the HPET access takes thousands of CPU cycles.
767 *
768 * On non-SB700 based machines this check is only done once and has no
769 * side effects.
770 */
771static bool __init hpet_cfg_working(void)
862{ 772{
863 u64 start, now; 773 int i;
864 u64 t1; 774
775 for (i = 0; i < 1000; i++) {
776 if (hpet_readl(HPET_CFG) != 0xFFFFFFFF)
777 return true;
778 }
779
780 pr_warn("Config register invalid. Disabling HPET\n");
781 return false;
782}
783
784static bool __init hpet_counting(void)
785{
786 u64 start, now, t1;
865 787
866 /* Start the counter */
867 hpet_restart_counter(); 788 hpet_restart_counter();
868 789
869 /* Verify whether hpet counter works */
870 t1 = hpet_readl(HPET_COUNTER); 790 t1 = hpet_readl(HPET_COUNTER);
871 start = rdtsc(); 791 start = rdtsc();
872 792
@@ -877,30 +797,24 @@ static int hpet_clocksource_register(void)
877 * 1 GHz == 200us 797 * 1 GHz == 200us
878 */ 798 */
879 do { 799 do {
880 rep_nop(); 800 if (t1 != hpet_readl(HPET_COUNTER))
801 return true;
881 now = rdtsc(); 802 now = rdtsc();
882 } while ((now - start) < 200000UL); 803 } while ((now - start) < 200000UL);
883 804
884 if (t1 == hpet_readl(HPET_COUNTER)) { 805 pr_warn("Counter not counting. HPET disabled\n");
885 printk(KERN_WARNING 806 return false;
886 "HPET counter not counting. HPET disabled\n");
887 return -ENODEV;
888 }
889
890 clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
891 return 0;
892} 807}
893 808
894static u32 *hpet_boot_cfg;
895
896/** 809/**
897 * hpet_enable - Try to setup the HPET timer. Returns 1 on success. 810 * hpet_enable - Try to setup the HPET timer. Returns 1 on success.
898 */ 811 */
899int __init hpet_enable(void) 812int __init hpet_enable(void)
900{ 813{
901 u32 hpet_period, cfg, id; 814 u32 hpet_period, cfg, id, irq;
815 unsigned int i, channels;
816 struct hpet_channel *hc;
902 u64 freq; 817 u64 freq;
903 unsigned int i, last;
904 818
905 if (!is_hpet_capable()) 819 if (!is_hpet_capable())
906 return 0; 820 return 0;
@@ -909,40 +823,22 @@ int __init hpet_enable(void)
909 if (!hpet_virt_address) 823 if (!hpet_virt_address)
910 return 0; 824 return 0;
911 825
826 /* Validate that the config register is working */
827 if (!hpet_cfg_working())
828 goto out_nohpet;
829
830 /* Validate that the counter is counting */
831 if (!hpet_counting())
832 goto out_nohpet;
833
912 /* 834 /*
913 * Read the period and check for a sane value: 835 * Read the period and check for a sane value:
914 */ 836 */
915 hpet_period = hpet_readl(HPET_PERIOD); 837 hpet_period = hpet_readl(HPET_PERIOD);
916
917 /*
918 * AMD SB700 based systems with spread spectrum enabled use a
919 * SMM based HPET emulation to provide proper frequency
920 * setting. The SMM code is initialized with the first HPET
921 * register access and takes some time to complete. During
922 * this time the config register reads 0xffffffff. We check
923 * for max. 1000 loops whether the config register reads a non
924 * 0xffffffff value to make sure that HPET is up and running
925 * before we go further. A counting loop is safe, as the HPET
926 * access takes thousands of CPU cycles. On non SB700 based
927 * machines this check is only done once and has no side
928 * effects.
929 */
930 for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
931 if (i == 1000) {
932 printk(KERN_WARNING
933 "HPET config register value = 0xFFFFFFFF. "
934 "Disabling HPET\n");
935 goto out_nohpet;
936 }
937 }
938
939 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) 838 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
940 goto out_nohpet; 839 goto out_nohpet;
941 840
942 /* 841 /* The period is a femtoseconds value. Convert it to a frequency. */
943 * The period is a femto seconds value. Convert it to a
944 * frequency.
945 */
946 freq = FSEC_PER_SEC; 842 freq = FSEC_PER_SEC;
947 do_div(freq, hpet_period); 843 do_div(freq, hpet_period);
948 hpet_freq = freq; 844 hpet_freq = freq;
@@ -954,72 +850,90 @@ int __init hpet_enable(void)
954 id = hpet_readl(HPET_ID); 850 id = hpet_readl(HPET_ID);
955 hpet_print_config(); 851 hpet_print_config();
956 852
957 last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; 853 /* This is the HPET channel number which is zero based */
854 channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
958 855
959#ifdef CONFIG_HPET_EMULATE_RTC
960 /* 856 /*
961 * The legacy routing mode needs at least two channels, tick timer 857 * The legacy routing mode needs at least two channels, tick timer
962 * and the rtc emulation channel. 858 * and the rtc emulation channel.
963 */ 859 */
964 if (!last) 860 if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2)
965 goto out_nohpet; 861 goto out_nohpet;
966#endif
967 862
863 hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL);
864 if (!hc) {
865 pr_warn("Disabling HPET.\n");
866 goto out_nohpet;
867 }
868 hpet_base.channels = hc;
869 hpet_base.nr_channels = channels;
870
871 /* Read, store and sanitize the global configuration */
968 cfg = hpet_readl(HPET_CFG); 872 cfg = hpet_readl(HPET_CFG);
969 hpet_boot_cfg = kmalloc_array(last + 2, sizeof(*hpet_boot_cfg), 873 hpet_base.boot_cfg = cfg;
970 GFP_KERNEL);
971 if (hpet_boot_cfg)
972 *hpet_boot_cfg = cfg;
973 else
974 pr_warn("HPET initial state will not be saved\n");
975 cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); 874 cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
976 hpet_writel(cfg, HPET_CFG); 875 hpet_writel(cfg, HPET_CFG);
977 if (cfg) 876 if (cfg)
978 pr_warn("Unrecognized bits %#x set in global cfg\n", cfg); 877 pr_warn("Global config: Unknown bits %#x\n", cfg);
878
879 /* Read, store and sanitize the per channel configuration */
880 for (i = 0; i < channels; i++, hc++) {
881 hc->num = i;
979 882
980 for (i = 0; i <= last; ++i) {
981 cfg = hpet_readl(HPET_Tn_CFG(i)); 883 cfg = hpet_readl(HPET_Tn_CFG(i));
982 if (hpet_boot_cfg) 884 hc->boot_cfg = cfg;
983 hpet_boot_cfg[i + 1] = cfg; 885 irq = (cfg & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
886 hc->irq = irq;
887
984 cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB); 888 cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB);
985 hpet_writel(cfg, HPET_Tn_CFG(i)); 889 hpet_writel(cfg, HPET_Tn_CFG(i));
890
986 cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP 891 cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP
987 | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE 892 | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
988 | HPET_TN_FSB | HPET_TN_FSB_CAP); 893 | HPET_TN_FSB | HPET_TN_FSB_CAP);
989 if (cfg) 894 if (cfg)
990 pr_warn("Unrecognized bits %#x set in cfg#%u\n", 895 pr_warn("Channel #%u config: Unknown bits %#x\n", i, cfg);
991 cfg, i);
992 } 896 }
993 hpet_print_config(); 897 hpet_print_config();
994 898
995 if (hpet_clocksource_register()) 899 clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
996 goto out_nohpet;
997 900
998 if (id & HPET_ID_LEGSUP) { 901 if (id & HPET_ID_LEGSUP) {
999 hpet_legacy_clockevent_register(); 902 hpet_legacy_clockevent_register(&hpet_base.channels[0]);
903 hpet_base.channels[0].mode = HPET_MODE_LEGACY;
904 if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC))
905 hpet_base.channels[1].mode = HPET_MODE_LEGACY;
1000 return 1; 906 return 1;
1001 } 907 }
1002 return 0; 908 return 0;
1003 909
1004out_nohpet: 910out_nohpet:
911 kfree(hpet_base.channels);
912 hpet_base.channels = NULL;
913 hpet_base.nr_channels = 0;
1005 hpet_clear_mapping(); 914 hpet_clear_mapping();
1006 hpet_address = 0; 915 hpet_address = 0;
1007 return 0; 916 return 0;
1008} 917}
1009 918
1010/* 919/*
1011 * Needs to be late, as the reserve_timer code calls kalloc ! 920 * The late initialization runs after the PCI quirks have been invoked
921 * which might have detected a system on which the HPET can be enforced.
922 *
923 * Also, the MSI machinery is not working yet when the HPET is initialized
924 * early.
1012 * 925 *
1013 * Not a problem on i386 as hpet_enable is called from late_time_init, 926 * If the HPET is enabled, then:
1014 * but on x86_64 it is necessary ! 927 *
928 * 1) Reserve one channel for /dev/hpet if CONFIG_HPET=y
929 * 2) Reserve up to num_possible_cpus() channels as per CPU clockevents
930 * 3) Setup /dev/hpet if CONFIG_HPET=y
931 * 4) Register hotplug callbacks when clockevents are available
1015 */ 932 */
1016static __init int hpet_late_init(void) 933static __init int hpet_late_init(void)
1017{ 934{
1018 int ret; 935 int ret;
1019 936
1020 if (boot_hpet_disable)
1021 return -ENODEV;
1022
1023 if (!hpet_address) { 937 if (!hpet_address) {
1024 if (!force_hpet_address) 938 if (!force_hpet_address)
1025 return -ENODEV; 939 return -ENODEV;
@@ -1031,21 +945,14 @@ static __init int hpet_late_init(void)
1031 if (!hpet_virt_address) 945 if (!hpet_virt_address)
1032 return -ENODEV; 946 return -ENODEV;
1033 947
1034 if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP) 948 hpet_select_device_channel();
1035 hpet_msi_capability_lookup(2); 949 hpet_select_clockevents();
1036 else 950 hpet_reserve_platform_timers();
1037 hpet_msi_capability_lookup(0);
1038
1039 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
1040 hpet_print_config(); 951 hpet_print_config();
1041 952
1042 if (hpet_msi_disable) 953 if (!hpet_base.nr_clockevents)
1043 return 0; 954 return 0;
1044 955
1045 if (boot_cpu_has(X86_FEATURE_ARAT))
1046 return 0;
1047
1048 /* This notifier should be called after workqueue is ready */
1049 ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online", 956 ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online",
1050 hpet_cpuhp_online, NULL); 957 hpet_cpuhp_online, NULL);
1051 if (ret) 958 if (ret)
@@ -1064,47 +971,47 @@ fs_initcall(hpet_late_init);
1064 971
1065void hpet_disable(void) 972void hpet_disable(void)
1066{ 973{
1067 if (is_hpet_capable() && hpet_virt_address) { 974 unsigned int i;
1068 unsigned int cfg = hpet_readl(HPET_CFG), id, last; 975 u32 cfg;
1069
1070 if (hpet_boot_cfg)
1071 cfg = *hpet_boot_cfg;
1072 else if (hpet_legacy_int_enabled) {
1073 cfg &= ~HPET_CFG_LEGACY;
1074 hpet_legacy_int_enabled = false;
1075 }
1076 cfg &= ~HPET_CFG_ENABLE;
1077 hpet_writel(cfg, HPET_CFG);
1078 976
1079 if (!hpet_boot_cfg) 977 if (!is_hpet_capable() || !hpet_virt_address)
1080 return; 978 return;
1081 979
1082 id = hpet_readl(HPET_ID); 980 /* Restore boot configuration with the enable bit cleared */
1083 last = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); 981 cfg = hpet_base.boot_cfg;
982 cfg &= ~HPET_CFG_ENABLE;
983 hpet_writel(cfg, HPET_CFG);
1084 984
1085 for (id = 0; id <= last; ++id) 985 /* Restore the channel boot configuration */
1086 hpet_writel(hpet_boot_cfg[id + 1], HPET_Tn_CFG(id)); 986 for (i = 0; i < hpet_base.nr_channels; i++)
987 hpet_writel(hpet_base.channels[i].boot_cfg, HPET_Tn_CFG(i));
1087 988
1088 if (*hpet_boot_cfg & HPET_CFG_ENABLE) 989 /* If the HPET was enabled at boot time, reenable it */
1089 hpet_writel(*hpet_boot_cfg, HPET_CFG); 990 if (hpet_base.boot_cfg & HPET_CFG_ENABLE)
1090 } 991 hpet_writel(hpet_base.boot_cfg, HPET_CFG);
1091} 992}
1092 993
1093#ifdef CONFIG_HPET_EMULATE_RTC 994#ifdef CONFIG_HPET_EMULATE_RTC
1094 995
1095/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET 996/*
997 * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET
1096 * is enabled, we support RTC interrupt functionality in software. 998 * is enabled, we support RTC interrupt functionality in software.
999 *
1097 * RTC has 3 kinds of interrupts: 1000 * RTC has 3 kinds of interrupts:
1098 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock 1001 *
1099 * is updated 1002 * 1) Update Interrupt - generate an interrupt, every second, when the
1100 * 2) Alarm Interrupt - generate an interrupt at a specific time of day 1003 * RTC clock is updated
1101 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies 1004 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
1102 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) 1005 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
1103 * (1) and (2) above are implemented using polling at a frequency of 1006 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2)
1104 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt 1007 *
1105 * overhead. (DEFAULT_RTC_INT_FREQ) 1008 * (1) and (2) above are implemented using polling at a frequency of 64 Hz:
1106 * For (3), we use interrupts at 64Hz or user specified periodic 1009 * DEFAULT_RTC_INT_FREQ.
1107 * frequency, whichever is higher. 1010 *
1011 * The exact frequency is a tradeoff between accuracy and interrupt overhead.
1012 *
1013 * For (3), we use interrupts at 64 Hz, or the user specified periodic frequency,
1014 * if it's higher.
1108 */ 1015 */
1109#include <linux/mc146818rtc.h> 1016#include <linux/mc146818rtc.h>
1110#include <linux/rtc.h> 1017#include <linux/rtc.h>
@@ -1125,7 +1032,7 @@ static unsigned long hpet_pie_limit;
1125static rtc_irq_handler irq_handler; 1032static rtc_irq_handler irq_handler;
1126 1033
1127/* 1034/*
1128 * Check that the hpet counter c1 is ahead of the c2 1035 * Check that the HPET counter c1 is ahead of c2
1129 */ 1036 */
1130static inline int hpet_cnt_ahead(u32 c1, u32 c2) 1037static inline int hpet_cnt_ahead(u32 c1, u32 c2)
1131{ 1038{
@@ -1163,8 +1070,8 @@ void hpet_unregister_irq_handler(rtc_irq_handler handler)
1163EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); 1070EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler);
1164 1071
1165/* 1072/*
1166 * Timer 1 for RTC emulation. We use one shot mode, as periodic mode 1073 * Channel 1 for RTC emulation. We use one shot mode, as periodic mode
1167 * is not supported by all HPET implementations for timer 1. 1074 * is not supported by all HPET implementations for channel 1.
1168 * 1075 *
1169 * hpet_rtc_timer_init() is called when the rtc is initialized. 1076 * hpet_rtc_timer_init() is called when the rtc is initialized.
1170 */ 1077 */
@@ -1177,10 +1084,11 @@ int hpet_rtc_timer_init(void)
1177 return 0; 1084 return 0;
1178 1085
1179 if (!hpet_default_delta) { 1086 if (!hpet_default_delta) {
1087 struct clock_event_device *evt = &hpet_base.channels[0].evt;
1180 uint64_t clc; 1088 uint64_t clc;
1181 1089
1182 clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; 1090 clc = (uint64_t) evt->mult * NSEC_PER_SEC;
1183 clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; 1091 clc >>= evt->shift + DEFAULT_RTC_SHIFT;
1184 hpet_default_delta = clc; 1092 hpet_default_delta = clc;
1185 } 1093 }
1186 1094
@@ -1209,6 +1117,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
1209static void hpet_disable_rtc_channel(void) 1117static void hpet_disable_rtc_channel(void)
1210{ 1118{
1211 u32 cfg = hpet_readl(HPET_T1_CFG); 1119 u32 cfg = hpet_readl(HPET_T1_CFG);
1120
1212 cfg &= ~HPET_TN_ENABLE; 1121 cfg &= ~HPET_TN_ENABLE;
1213 hpet_writel(cfg, HPET_T1_CFG); 1122 hpet_writel(cfg, HPET_T1_CFG);
1214} 1123}
@@ -1250,8 +1159,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
1250} 1159}
1251EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit); 1160EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit);
1252 1161
1253int hpet_set_alarm_time(unsigned char hrs, unsigned char min, 1162int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
1254 unsigned char sec)
1255{ 1163{
1256 if (!is_hpet_enabled()) 1164 if (!is_hpet_enabled())
1257 return 0; 1165 return 0;
@@ -1271,15 +1179,18 @@ int hpet_set_periodic_freq(unsigned long freq)
1271 if (!is_hpet_enabled()) 1179 if (!is_hpet_enabled())
1272 return 0; 1180 return 0;
1273 1181
1274 if (freq <= DEFAULT_RTC_INT_FREQ) 1182 if (freq <= DEFAULT_RTC_INT_FREQ) {
1275 hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; 1183 hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq;
1276 else { 1184 } else {
1277 clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; 1185 struct clock_event_device *evt = &hpet_base.channels[0].evt;
1186
1187 clc = (uint64_t) evt->mult * NSEC_PER_SEC;
1278 do_div(clc, freq); 1188 do_div(clc, freq);
1279 clc >>= hpet_clockevent.shift; 1189 clc >>= evt->shift;
1280 hpet_pie_delta = clc; 1190 hpet_pie_delta = clc;
1281 hpet_pie_limit = 0; 1191 hpet_pie_limit = 0;
1282 } 1192 }
1193
1283 return 1; 1194 return 1;
1284} 1195}
1285EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); 1196EXPORT_SYMBOL_GPL(hpet_set_periodic_freq);
@@ -1317,8 +1228,7 @@ static void hpet_rtc_timer_reinit(void)
1317 if (hpet_rtc_flags & RTC_PIE) 1228 if (hpet_rtc_flags & RTC_PIE)
1318 hpet_pie_count += lost_ints; 1229 hpet_pie_count += lost_ints;
1319 if (printk_ratelimit()) 1230 if (printk_ratelimit())
1320 printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n", 1231 pr_warn("Lost %d RTC interrupts\n", lost_ints);
1321 lost_ints);
1322 } 1232 }
1323} 1233}
1324 1234
@@ -1340,8 +1250,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
1340 hpet_prev_update_sec = curr_time.tm_sec; 1250 hpet_prev_update_sec = curr_time.tm_sec;
1341 } 1251 }
1342 1252
1343 if (hpet_rtc_flags & RTC_PIE && 1253 if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) {
1344 ++hpet_pie_count >= hpet_pie_limit) {
1345 rtc_int_flag |= RTC_PF; 1254 rtc_int_flag |= RTC_PF;
1346 hpet_pie_count = 0; 1255 hpet_pie_count = 0;
1347 } 1256 }
@@ -1350,7 +1259,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
1350 (curr_time.tm_sec == hpet_alarm_time.tm_sec) && 1259 (curr_time.tm_sec == hpet_alarm_time.tm_sec) &&
1351 (curr_time.tm_min == hpet_alarm_time.tm_min) && 1260 (curr_time.tm_min == hpet_alarm_time.tm_min) &&
1352 (curr_time.tm_hour == hpet_alarm_time.tm_hour)) 1261 (curr_time.tm_hour == hpet_alarm_time.tm_hour))
1353 rtc_int_flag |= RTC_AF; 1262 rtc_int_flag |= RTC_AF;
1354 1263
1355 if (rtc_int_flag) { 1264 if (rtc_int_flag) {
1356 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); 1265 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 0d307a657abb..2b7999a1a50a 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -8,6 +8,7 @@
8#include <linux/timex.h> 8#include <linux/timex.h>
9#include <linux/i8253.h> 9#include <linux/i8253.h>
10 10
11#include <asm/apic.h>
11#include <asm/hpet.h> 12#include <asm/hpet.h>
12#include <asm/time.h> 13#include <asm/time.h>
13#include <asm/smp.h> 14#include <asm/smp.h>
@@ -18,10 +19,32 @@
18 */ 19 */
19struct clock_event_device *global_clock_event; 20struct clock_event_device *global_clock_event;
20 21
21void __init setup_pit_timer(void) 22/*
23 * Modern chipsets can disable the PIT clock which makes it unusable. It
24 * would be possible to enable the clock but the registers are chipset
25 * specific and not discoverable. Avoid the whack a mole game.
26 *
27 * These platforms have discoverable TSC/CPU frequencies but this also
28 * requires to know the local APIC timer frequency as it normally is
29 * calibrated against the PIT interrupt.
30 */
31static bool __init use_pit(void)
32{
33 if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC))
34 return true;
35
36 /* This also returns true when APIC is disabled */
37 return apic_needs_pit();
38}
39
40bool __init pit_timer_init(void)
22{ 41{
42 if (!use_pit())
43 return false;
44
23 clockevent_i8253_init(true); 45 clockevent_i8253_init(true);
24 global_clock_event = &i8253_clockevent; 46 global_clock_event = &i8253_clockevent;
47 return true;
25} 48}
26 49
27#ifndef CONFIG_X86_64 50#ifndef CONFIG_X86_64
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d2482bbbe3d0..87ef69a72c52 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -319,7 +319,8 @@ void __init idt_setup_apic_and_irq_gates(void)
319#ifdef CONFIG_X86_LOCAL_APIC 319#ifdef CONFIG_X86_LOCAL_APIC
320 for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { 320 for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
321 set_bit(i, system_vectors); 321 set_bit(i, system_vectors);
322 set_intr_gate(i, spurious_interrupt); 322 entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
323 set_intr_gate(i, entry);
323 } 324 }
324#endif 325#endif
325} 326}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 9b68b5b00ac9..cc496eb7a8d2 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -247,7 +247,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
247 if (!handle_irq(desc, regs)) { 247 if (!handle_irq(desc, regs)) {
248 ack_APIC_irq(); 248 ack_APIC_irq();
249 249
250 if (desc != VECTOR_RETRIGGERED) { 250 if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
251 pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n", 251 pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
252 __func__, smp_processor_id(), 252 __func__, smp_processor_id(),
253 vector); 253 vector);
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index 1b2ee55a2dfb..ba95bc70460d 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now)
45 45
46static void __init jailhouse_timer_init(void) 46static void __init jailhouse_timer_init(void)
47{ 47{
48 lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); 48 lapic_timer_period = setup_data.apic_khz * (1000 / HZ);
49} 49}
50 50
51static unsigned long jailhouse_get_tsc(void) 51static unsigned long jailhouse_get_tsc(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e631c358f7f4..044053235302 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -35,41 +35,43 @@ static void bug_at(unsigned char *ip, int line)
35 BUG(); 35 BUG();
36} 36}
37 37
38static void __ref __jump_label_transform(struct jump_entry *entry, 38static void __jump_label_set_jump_code(struct jump_entry *entry,
39 enum jump_label_type type, 39 enum jump_label_type type,
40 int init) 40 union jump_code_union *code,
41 int init)
41{ 42{
42 union jump_code_union jmp;
43 const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; 43 const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
44 const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; 44 const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
45 const void *expect, *code; 45 const void *expect;
46 int line; 46 int line;
47 47
48 jmp.jump = 0xe9; 48 code->jump = 0xe9;
49 jmp.offset = jump_entry_target(entry) - 49 code->offset = jump_entry_target(entry) -
50 (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); 50 (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
51 51
52 if (type == JUMP_LABEL_JMP) { 52 if (init) {
53 if (init) { 53 expect = default_nop; line = __LINE__;
54 expect = default_nop; line = __LINE__; 54 } else if (type == JUMP_LABEL_JMP) {
55 } else { 55 expect = ideal_nop; line = __LINE__;
56 expect = ideal_nop; line = __LINE__;
57 }
58
59 code = &jmp.code;
60 } else { 56 } else {
61 if (init) { 57 expect = code->code; line = __LINE__;
62 expect = default_nop; line = __LINE__;
63 } else {
64 expect = &jmp.code; line = __LINE__;
65 }
66
67 code = ideal_nop;
68 } 58 }
69 59
70 if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE)) 60 if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
71 bug_at((void *)jump_entry_code(entry), line); 61 bug_at((void *)jump_entry_code(entry), line);
72 62
63 if (type == JUMP_LABEL_NOP)
64 memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
65}
66
67static void __ref __jump_label_transform(struct jump_entry *entry,
68 enum jump_label_type type,
69 int init)
70{
71 union jump_code_union code;
72
73 __jump_label_set_jump_code(entry, type, &code, init);
74
73 /* 75 /*
74 * As long as only a single processor is running and the code is still 76 * As long as only a single processor is running and the code is still
75 * not marked as RO, text_poke_early() can be used; Checking that 77 * not marked as RO, text_poke_early() can be used; Checking that
@@ -82,12 +84,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
82 * always nop being the 'currently valid' instruction 84 * always nop being the 'currently valid' instruction
83 */ 85 */
84 if (init || system_state == SYSTEM_BOOTING) { 86 if (init || system_state == SYSTEM_BOOTING) {
85 text_poke_early((void *)jump_entry_code(entry), code, 87 text_poke_early((void *)jump_entry_code(entry), &code,
86 JUMP_LABEL_NOP_SIZE); 88 JUMP_LABEL_NOP_SIZE);
87 return; 89 return;
88 } 90 }
89 91
90 text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE, 92 text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
91 (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); 93 (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
92} 94}
93 95
@@ -99,6 +101,75 @@ void arch_jump_label_transform(struct jump_entry *entry,
99 mutex_unlock(&text_mutex); 101 mutex_unlock(&text_mutex);
100} 102}
101 103
104#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
105static struct text_poke_loc tp_vec[TP_VEC_MAX];
106static int tp_vec_nr;
107
108bool arch_jump_label_transform_queue(struct jump_entry *entry,
109 enum jump_label_type type)
110{
111 struct text_poke_loc *tp;
112 void *entry_code;
113
114 if (system_state == SYSTEM_BOOTING) {
115 /*
116 * Fallback to the non-batching mode.
117 */
118 arch_jump_label_transform(entry, type);
119 return true;
120 }
121
122 /*
123 * No more space in the vector, tell upper layer to apply
124 * the queue before continuing.
125 */
126 if (tp_vec_nr == TP_VEC_MAX)
127 return false;
128
129 tp = &tp_vec[tp_vec_nr];
130
131 entry_code = (void *)jump_entry_code(entry);
132
133 /*
134 * The INT3 handler will do a bsearch in the queue, so we need entries
135 * to be sorted. We can survive an unsorted list by rejecting the entry,
136 * forcing the generic jump_label code to apply the queue. Warning once,
137 * to raise the attention to the case of an unsorted entry that is
138 * better not happen, because, in the worst case we will perform in the
139 * same way as we do without batching - with some more overhead.
140 */
141 if (tp_vec_nr > 0) {
142 int prev = tp_vec_nr - 1;
143 struct text_poke_loc *prev_tp = &tp_vec[prev];
144
145 if (WARN_ON_ONCE(prev_tp->addr > entry_code))
146 return false;
147 }
148
149 __jump_label_set_jump_code(entry, type,
150 (union jump_code_union *) &tp->opcode, 0);
151
152 tp->addr = entry_code;
153 tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
154 tp->len = JUMP_LABEL_NOP_SIZE;
155
156 tp_vec_nr++;
157
158 return true;
159}
160
161void arch_jump_label_transform_apply(void)
162{
163 if (!tp_vec_nr)
164 return;
165
166 mutex_lock(&text_mutex);
167 text_poke_bp_batch(tp_vec, tp_vec_nr);
168 mutex_unlock(&text_mutex);
169
170 tp_vec_nr = 0;
171}
172
102static enum { 173static enum {
103 JL_STATE_START, 174 JL_STATE_START,
104 JL_STATE_NO_UPDATE, 175 JL_STATE_NO_UPDATE,
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 07c30ee17425..bb7e1132290b 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -74,6 +74,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
74 return regs_get_register(regs, pt_regs_offset[idx]); 74 return regs_get_register(regs, pt_regs_offset[idx]);
75} 75}
76 76
77#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \
78 ~((1ULL << PERF_REG_X86_MAX) - 1))
79
77#ifdef CONFIG_X86_32 80#ifdef CONFIG_X86_32
78#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \ 81#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
79 (1ULL << PERF_REG_X86_R9) | \ 82 (1ULL << PERF_REG_X86_R9) | \
@@ -86,7 +89,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
86 89
87int perf_reg_validate(u64 mask) 90int perf_reg_validate(u64 mask)
88{ 91{
89 if (!mask || (mask & REG_NOSUPPORT)) 92 if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
90 return -EINVAL; 93 return -EINVAL;
91 94
92 return 0; 95 return 0;
@@ -112,7 +115,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
112 115
113int perf_reg_validate(u64 mask) 116int perf_reg_validate(u64 mask)
114{ 117{
115 if (!mask || (mask & REG_NOSUPPORT)) 118 if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
116 return -EINVAL; 119 return -EINVAL;
117 120
118 return 0; 121 return 0;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a166c960bc9e..ee9099061d01 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -25,6 +25,7 @@
25#include <linux/rcupdate.h> 25#include <linux/rcupdate.h>
26#include <linux/export.h> 26#include <linux/export.h>
27#include <linux/context_tracking.h> 27#include <linux/context_tracking.h>
28#include <linux/nospec.h>
28 29
29#include <linux/uaccess.h> 30#include <linux/uaccess.h>
30#include <asm/pgtable.h> 31#include <asm/pgtable.h>
@@ -397,22 +398,12 @@ static int putreg(struct task_struct *child,
397 case offsetof(struct user_regs_struct,fs_base): 398 case offsetof(struct user_regs_struct,fs_base):
398 if (value >= TASK_SIZE_MAX) 399 if (value >= TASK_SIZE_MAX)
399 return -EIO; 400 return -EIO;
400 /* 401 x86_fsbase_write_task(child, value);
401 * When changing the FS base, use do_arch_prctl_64()
402 * to set the index to zero and to set the base
403 * as requested.
404 */
405 if (child->thread.fsbase != value)
406 return do_arch_prctl_64(child, ARCH_SET_FS, value);
407 return 0; 402 return 0;
408 case offsetof(struct user_regs_struct,gs_base): 403 case offsetof(struct user_regs_struct,gs_base):
409 /*
410 * Exactly the same here as the %fs handling above.
411 */
412 if (value >= TASK_SIZE_MAX) 404 if (value >= TASK_SIZE_MAX)
413 return -EIO; 405 return -EIO;
414 if (child->thread.gsbase != value) 406 x86_gsbase_write_task(child, value);
415 return do_arch_prctl_64(child, ARCH_SET_GS, value);
416 return 0; 407 return 0;
417#endif 408#endif
418 } 409 }
@@ -645,7 +636,8 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
645 unsigned long val = 0; 636 unsigned long val = 0;
646 637
647 if (n < HBP_NUM) { 638 if (n < HBP_NUM) {
648 struct perf_event *bp = thread->ptrace_bps[n]; 639 int index = array_index_nospec(n, HBP_NUM);
640 struct perf_event *bp = thread->ptrace_bps[index];
649 641
650 if (bp) 642 if (bp)
651 val = bp->hw.info.address; 643 val = bp->hw.info.address;
@@ -747,9 +739,6 @@ static int ioperm_get(struct task_struct *target,
747void ptrace_disable(struct task_struct *child) 739void ptrace_disable(struct task_struct *child)
748{ 740{
749 user_disable_single_step(child); 741 user_disable_single_step(child);
750#ifdef TIF_SYSCALL_EMU
751 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
752#endif
753} 742}
754 743
755#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 744#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 0ff3e294d0e5..10125358b9c4 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -3,6 +3,7 @@
3 3
4*/ 4*/
5 5
6#include <linux/clocksource.h>
6#include <linux/kernel.h> 7#include <linux/kernel.h>
7#include <linux/percpu.h> 8#include <linux/percpu.h>
8#include <linux/notifier.h> 9#include <linux/notifier.h>
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 4693e2f3a03e..96421f97e75c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -144,7 +144,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
144 } 144 }
145 145
146 cpumask_copy(allbutself, cpu_online_mask); 146 cpumask_copy(allbutself, cpu_online_mask);
147 cpumask_clear_cpu(smp_processor_id(), allbutself); 147 __cpumask_clear_cpu(smp_processor_id(), allbutself);
148 148
149 if (cpumask_equal(mask, allbutself) && 149 if (cpumask_equal(mask, allbutself) &&
150 cpumask_equal(cpu_online_mask, cpu_callout_mask)) 150 cpumask_equal(cpu_online_mask, cpu_callout_mask))
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 0e14f6c0d35e..07c0e960b3f3 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -82,8 +82,11 @@ static void __init setup_default_timer_irq(void)
82/* Default timer init function */ 82/* Default timer init function */
83void __init hpet_time_init(void) 83void __init hpet_time_init(void)
84{ 84{
85 if (!hpet_enable()) 85 if (!hpet_enable()) {
86 setup_pit_timer(); 86 if (!pit_timer_init())
87 return;
88 }
89
87 setup_default_timer_irq(); 90 setup_default_timer_irq();
88} 91}
89 92
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index a5b802a12212..71d3fef1edc9 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -5,6 +5,7 @@
5#include <linux/user.h> 5#include <linux/user.h>
6#include <linux/regset.h> 6#include <linux/regset.h>
7#include <linux/syscalls.h> 7#include <linux/syscalls.h>
8#include <linux/nospec.h>
8 9
9#include <linux/uaccess.h> 10#include <linux/uaccess.h>
10#include <asm/desc.h> 11#include <asm/desc.h>
@@ -220,6 +221,7 @@ int do_get_thread_area(struct task_struct *p, int idx,
220 struct user_desc __user *u_info) 221 struct user_desc __user *u_info)
221{ 222{
222 struct user_desc info; 223 struct user_desc info;
224 int index;
223 225
224 if (idx == -1 && get_user(idx, &u_info->entry_number)) 226 if (idx == -1 && get_user(idx, &u_info->entry_number))
225 return -EFAULT; 227 return -EFAULT;
@@ -227,8 +229,11 @@ int do_get_thread_area(struct task_struct *p, int idx,
227 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) 229 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
228 return -EINVAL; 230 return -EINVAL;
229 231
230 fill_user_desc(&info, idx, 232 index = idx - GDT_ENTRY_TLS_MIN;
231 &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]); 233 index = array_index_nospec(index,
234 GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1);
235
236 fill_user_desc(&info, idx, &p->thread.tls_array[index]);
232 237
233 if (copy_to_user(u_info, &info, sizeof(info))) 238 if (copy_to_user(u_info, &info, sizeof(info)))
234 return -EFAULT; 239 return -EFAULT;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0b29e58f288e..59b57605e66c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -632,31 +632,38 @@ unsigned long native_calibrate_tsc(void)
632 632
633 crystal_khz = ecx_hz / 1000; 633 crystal_khz = ecx_hz / 1000;
634 634
635 if (crystal_khz == 0) { 635 /*
636 switch (boot_cpu_data.x86_model) { 636 * Denverton SoCs don't report crystal clock, and also don't support
637 case INTEL_FAM6_SKYLAKE_MOBILE: 637 * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
638 case INTEL_FAM6_SKYLAKE_DESKTOP: 638 * clock.
639 case INTEL_FAM6_KABYLAKE_MOBILE: 639 */
640 case INTEL_FAM6_KABYLAKE_DESKTOP: 640 if (crystal_khz == 0 &&
641 crystal_khz = 24000; /* 24.0 MHz */ 641 boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
642 break; 642 crystal_khz = 25000;
643 case INTEL_FAM6_ATOM_GOLDMONT_X:
644 crystal_khz = 25000; /* 25.0 MHz */
645 break;
646 case INTEL_FAM6_ATOM_GOLDMONT:
647 crystal_khz = 19200; /* 19.2 MHz */
648 break;
649 }
650 }
651 643
652 if (crystal_khz == 0)
653 return 0;
654 /* 644 /*
655 * TSC frequency determined by CPUID is a "hardware reported" 645 * TSC frequency reported directly by CPUID is a "hardware reported"
656 * frequency and is the most accurate one so far we have. This 646 * frequency and is the most accurate one so far we have. This
657 * is considered a known frequency. 647 * is considered a known frequency.
658 */ 648 */
659 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 649 if (crystal_khz != 0)
650 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
651
652 /*
653 * Some Intel SoCs like Skylake and Kabylake don't report the crystal
654 * clock, but we can easily calculate it to a high degree of accuracy
655 * by considering the crystal ratio and the CPU speed.
656 */
657 if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
658 unsigned int eax_base_mhz, ebx, ecx, edx;
659
660 cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
661 crystal_khz = eax_base_mhz * 1000 *
662 eax_denominator / ebx_numerator;
663 }
664
665 if (crystal_khz == 0)
666 return 0;
660 667
661 /* 668 /*
662 * For Atom SoCs TSC is the only reliable clocksource. 669 * For Atom SoCs TSC is the only reliable clocksource.
@@ -665,6 +672,16 @@ unsigned long native_calibrate_tsc(void)
665 if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT) 672 if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
666 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); 673 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
667 674
675#ifdef CONFIG_X86_LOCAL_APIC
676 /*
677 * The local APIC appears to be fed by the core crystal clock
678 * (which sounds entirely sensible). We can set the global
679 * lapic_timer_period here to avoid having to calibrate the APIC
680 * timer later.
681 */
682 lapic_timer_period = crystal_khz * 1000 / HZ;
683#endif
684
668 return crystal_khz * ebx_numerator / eax_denominator; 685 return crystal_khz * ebx_numerator / eax_denominator;
669} 686}
670 687
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 3d0e9aeea7c8..067858fe4db8 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -71,7 +71,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
71/* 71/*
72 * MSR-based CPU/TSC frequency discovery for certain CPUs. 72 * MSR-based CPU/TSC frequency discovery for certain CPUs.
73 * 73 *
74 * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy 74 * Set global "lapic_timer_period" to bus_clock_cycles/jiffy
75 * Return processor base frequency in KHz, or 0 on failure. 75 * Return processor base frequency in KHz, or 0 on failure.
76 */ 76 */
77unsigned long cpu_khz_from_msr(void) 77unsigned long cpu_khz_from_msr(void)
@@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void)
104 res = freq * ratio; 104 res = freq * ratio;
105 105
106#ifdef CONFIG_X86_LOCAL_APIC 106#ifdef CONFIG_X86_LOCAL_APIC
107 lapic_timer_frequency = (freq * 1000) / HZ; 107 lapic_timer_period = (freq * 1000) / HZ;
108#endif 108#endif
109 109
110 /* 110 /*
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 33b66b5c5aec..72b997eaa1fc 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip);
82 * But they are copies of the ftrace entries that are static and 82 * But they are copies of the ftrace entries that are static and
83 * defined in ftrace_*.S, which do have orc entries. 83 * defined in ftrace_*.S, which do have orc entries.
84 * 84 *
85 * If the undwinder comes across a ftrace trampoline, then find the 85 * If the unwinder comes across a ftrace trampoline, then find the
86 * ftrace function that was used to create it, and use that ftrace 86 * ftrace function that was used to create it, and use that ftrace
87 * function's orc entrie, as the placement of the return code in 87 * function's orc entry, as the placement of the return code in
88 * the stack will be identical. 88 * the stack will be identical.
89 */ 89 */
90static struct orc_entry *orc_ftrace_find(unsigned long ip) 90static struct orc_entry *orc_ftrace_find(unsigned long ip)
@@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = {
128 .type = ORC_TYPE_CALL 128 .type = ORC_TYPE_CALL
129}; 129};
130 130
131/* Fake frame pointer entry -- used as a fallback for generated code */
132static struct orc_entry orc_fp_entry = {
133 .type = ORC_TYPE_CALL,
134 .sp_reg = ORC_REG_BP,
135 .sp_offset = 16,
136 .bp_reg = ORC_REG_PREV_SP,
137 .bp_offset = -16,
138 .end = 0,
139};
140
131static struct orc_entry *orc_find(unsigned long ip) 141static struct orc_entry *orc_find(unsigned long ip)
132{ 142{
133 static struct orc_entry *orc; 143 static struct orc_entry *orc;
@@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state)
392 * calls and calls to noreturn functions. 402 * calls and calls to noreturn functions.
393 */ 403 */
394 orc = orc_find(state->signal ? state->ip : state->ip - 1); 404 orc = orc_find(state->signal ? state->ip : state->ip - 1);
395 if (!orc) 405 if (!orc) {
396 goto err; 406 /*
407 * As a fallback, try to assume this code uses a frame pointer.
408 * This is useful for generated code, like BPF, which ORC
409 * doesn't know about. This is just a guess, so the rest of
410 * the unwind is no longer considered reliable.
411 */
412 orc = &orc_fp_entry;
413 state->error = true;
414 }
397 415
398 /* End-of-stack check for kernel threads: */ 416 /* End-of-stack check for kernel threads: */
399 if (orc->sp_reg == ORC_REG_UNDEFINED) { 417 if (orc->sp_reg == ORC_REG_UNDEFINED) {
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9a327d5b6d1f..d78a61408243 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -47,8 +47,6 @@ static const struct cpuid_reg reverse_cpuid[] = {
47 [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, 47 [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
48 [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, 48 [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
49 [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, 49 [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
50 [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
51 [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX},
52 [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, 50 [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},
53 [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, 51 [CPUID_6_EAX] = { 6, 0, CPUID_EAX},
54 [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, 52 [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a21c440ff356..4dabc318adb8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2339,7 +2339,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2339 struct kvm_lapic *apic = vcpu->arch.apic; 2339 struct kvm_lapic *apic = vcpu->arch.apic;
2340 u32 ppr; 2340 u32 ppr;
2341 2341
2342 if (!apic_enabled(apic)) 2342 if (!kvm_apic_hw_enabled(apic))
2343 return -1; 2343 return -1;
2344 2344
2345 __apic_update_ppr(apic, &ppr); 2345 __apic_update_ppr(apic, &ppr);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 132d149494d6..ab73a9a639ae 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -261,10 +261,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
261 ctr_val = rdtsc(); 261 ctr_val = rdtsc();
262 break; 262 break;
263 case VMWARE_BACKDOOR_PMC_REAL_TIME: 263 case VMWARE_BACKDOOR_PMC_REAL_TIME:
264 ctr_val = ktime_get_boot_ns(); 264 ctr_val = ktime_get_boottime_ns();
265 break; 265 break;
266 case VMWARE_BACKDOOR_PMC_APPARENT_TIME: 266 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
267 ctr_val = ktime_get_boot_ns() + 267 ctr_val = ktime_get_boottime_ns() +
268 vcpu->kvm->arch.kvmclock_offset; 268 vcpu->kvm->arch.kvmclock_offset;
269 break; 269 break;
270 default: 270 default:
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5f9c1a200201..46af3a5e9209 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
5240 vmx = to_vmx(vcpu); 5240 vmx = to_vmx(vcpu);
5241 vmcs12 = get_vmcs12(vcpu); 5241 vmcs12 = get_vmcs12(vcpu);
5242 5242
5243 if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled)
5244 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
5245
5246 if (nested_vmx_allowed(vcpu) && 5243 if (nested_vmx_allowed(vcpu) &&
5247 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { 5244 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
5248 kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; 5245 kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
@@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
5251 if (vmx_has_valid_vmcs12(vcpu)) { 5248 if (vmx_has_valid_vmcs12(vcpu)) {
5252 kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); 5249 kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
5253 5250
5251 if (vmx->nested.hv_evmcs)
5252 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
5253
5254 if (is_guest_mode(vcpu) && 5254 if (is_guest_mode(vcpu) &&
5255 nested_cpu_has_shadow_vmcs(vmcs12) && 5255 nested_cpu_has_shadow_vmcs(vmcs12) &&
5256 vmcs12->vmcs_link_pointer != -1ull) 5256 vmcs12->vmcs_link_pointer != -1ull)
@@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
5350 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) 5350 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
5351 return -EINVAL; 5351 return -EINVAL;
5352 5352
5353 /*
5354 * KVM_STATE_NESTED_EVMCS used to signal that KVM should
5355 * enable eVMCS capability on vCPU. However, since then
5356 * code was changed such that flag signals vmcs12 should
5357 * be copied into eVMCS in guest memory.
5358 *
5359 * To preserve backwards compatability, allow user
5360 * to set this flag even when there is no VMXON region.
5361 */
5353 if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) 5362 if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
5354 return -EINVAL; 5363 return -EINVAL;
5355 } else { 5364 } else {
@@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
5358 5367
5359 if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) 5368 if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
5360 return -EINVAL; 5369 return -EINVAL;
5361 } 5370 }
5362 5371
5363 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && 5372 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
5364 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) 5373 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
@@ -5373,20 +5382,21 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
5373 * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags 5382 * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
5374 * must be zero. 5383 * must be zero.
5375 */ 5384 */
5376 if (is_smm(vcpu) ? kvm_state->flags : kvm_state->hdr.vmx.smm.flags) 5385 if (is_smm(vcpu) ?
5386 (kvm_state->flags &
5387 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
5388 : kvm_state->hdr.vmx.smm.flags)
5377 return -EINVAL; 5389 return -EINVAL;
5378 5390
5379 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && 5391 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
5380 !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) 5392 !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
5381 return -EINVAL; 5393 return -EINVAL;
5382 5394
5383 vmx_leave_nested(vcpu); 5395 if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
5384 if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { 5396 (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
5385 if (!nested_vmx_allowed(vcpu))
5386 return -EINVAL; 5397 return -EINVAL;
5387 5398
5388 nested_enable_evmcs(vcpu, NULL); 5399 vmx_leave_nested(vcpu);
5389 }
5390 5400
5391 if (kvm_state->hdr.vmx.vmxon_pa == -1ull) 5401 if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
5392 return 0; 5402 return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9857992d4e58..63bb1ee8258e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -67,6 +67,7 @@
67#include <asm/mshyperv.h> 67#include <asm/mshyperv.h>
68#include <asm/hypervisor.h> 68#include <asm/hypervisor.h>
69#include <asm/intel_pt.h> 69#include <asm/intel_pt.h>
70#include <clocksource/hyperv_timer.h>
70 71
71#define CREATE_TRACE_POINTS 72#define CREATE_TRACE_POINTS
72#include "trace.h" 73#include "trace.h"
@@ -1554,7 +1555,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1554 vcpu->arch.tsc_always_catchup = 1; 1555 vcpu->arch.tsc_always_catchup = 1;
1555 return 0; 1556 return 0;
1556 } else { 1557 } else {
1557 WARN(1, "user requested TSC rate below hardware speed\n"); 1558 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
1558 return -1; 1559 return -1;
1559 } 1560 }
1560 } 1561 }
@@ -1564,8 +1565,8 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1564 user_tsc_khz, tsc_khz); 1565 user_tsc_khz, tsc_khz);
1565 1566
1566 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { 1567 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
1567 WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", 1568 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
1568 user_tsc_khz); 1569 user_tsc_khz);
1569 return -1; 1570 return -1;
1570 } 1571 }
1571 1572
@@ -1728,7 +1729,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1728 1729
1729 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1730 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1730 offset = kvm_compute_tsc_offset(vcpu, data); 1731 offset = kvm_compute_tsc_offset(vcpu, data);
1731 ns = ktime_get_boot_ns(); 1732 ns = ktime_get_boottime_ns();
1732 elapsed = ns - kvm->arch.last_tsc_nsec; 1733 elapsed = ns - kvm->arch.last_tsc_nsec;
1733 1734
1734 if (vcpu->arch.virtual_tsc_khz) { 1735 if (vcpu->arch.virtual_tsc_khz) {
@@ -2070,7 +2071,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
2070 spin_lock(&ka->pvclock_gtod_sync_lock); 2071 spin_lock(&ka->pvclock_gtod_sync_lock);
2071 if (!ka->use_master_clock) { 2072 if (!ka->use_master_clock) {
2072 spin_unlock(&ka->pvclock_gtod_sync_lock); 2073 spin_unlock(&ka->pvclock_gtod_sync_lock);
2073 return ktime_get_boot_ns() + ka->kvmclock_offset; 2074 return ktime_get_boottime_ns() + ka->kvmclock_offset;
2074 } 2075 }
2075 2076
2076 hv_clock.tsc_timestamp = ka->master_cycle_now; 2077 hv_clock.tsc_timestamp = ka->master_cycle_now;
@@ -2086,7 +2087,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
2086 &hv_clock.tsc_to_system_mul); 2087 &hv_clock.tsc_to_system_mul);
2087 ret = __pvclock_read_cycles(&hv_clock, rdtsc()); 2088 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
2088 } else 2089 } else
2089 ret = ktime_get_boot_ns() + ka->kvmclock_offset; 2090 ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
2090 2091
2091 put_cpu(); 2092 put_cpu();
2092 2093
@@ -2185,7 +2186,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
2185 } 2186 }
2186 if (!use_master_clock) { 2187 if (!use_master_clock) {
2187 host_tsc = rdtsc(); 2188 host_tsc = rdtsc();
2188 kernel_ns = ktime_get_boot_ns(); 2189 kernel_ns = ktime_get_boottime_ns();
2189 } 2190 }
2190 2191
2191 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); 2192 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@ -9015,7 +9016,7 @@ int kvm_arch_hardware_enable(void)
9015 * before any KVM threads can be running. Unfortunately, we can't 9016 * before any KVM threads can be running. Unfortunately, we can't
9016 * bring the TSCs fully up to date with real time, as we aren't yet far 9017 * bring the TSCs fully up to date with real time, as we aren't yet far
9017 * enough into CPU bringup that we know how much real time has actually 9018 * enough into CPU bringup that we know how much real time has actually
9018 * elapsed; our helper function, ktime_get_boot_ns() will be using boot 9019 * elapsed; our helper function, ktime_get_boottime_ns() will be using boot
9019 * variables that haven't been updated yet. 9020 * variables that haven't been updated yet.
9020 * 9021 *
9021 * So we simply find the maximum observed TSC above, then record the 9022 * So we simply find the maximum observed TSC above, then record the
@@ -9243,7 +9244,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
9243 mutex_init(&kvm->arch.apic_map_lock); 9244 mutex_init(&kvm->arch.apic_map_lock);
9244 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); 9245 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
9245 9246
9246 kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); 9247 kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
9247 pvclock_update_vm_gtod_copy(kvm); 9248 pvclock_update_vm_gtod_copy(kvm);
9248 9249
9249 kvm->arch.guest_can_read_msr_platform_info = true; 9250 kvm->arch.guest_can_read_msr_platform_info = true;
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c
index 1811fa4a1b1a..7c48ff4ae8d1 100644
--- a/arch/x86/lib/cache-smp.c
+++ b/arch/x86/lib/cache-smp.c
@@ -15,6 +15,7 @@ EXPORT_SYMBOL(wbinvd_on_cpu);
15 15
16int wbinvd_on_all_cpus(void) 16int wbinvd_on_all_cpus(void)
17{ 17{
18 return on_each_cpu(__wbinvd, NULL, 1); 18 on_each_cpu(__wbinvd, NULL, 1);
19 return 0;
19} 20}
20EXPORT_SYMBOL(wbinvd_on_all_cpus); 21EXPORT_SYMBOL(wbinvd_on_all_cpus);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 46df4c6aae46..58e4f1f00bbc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -710,6 +710,10 @@ static void set_signal_archinfo(unsigned long address,
710 * To avoid leaking information about the kernel page 710 * To avoid leaking information about the kernel page
711 * table layout, pretend that user-mode accesses to 711 * table layout, pretend that user-mode accesses to
712 * kernel addresses are always protection faults. 712 * kernel addresses are always protection faults.
713 *
714 * NB: This means that failed vsyscalls with vsyscall=none
715 * will have the PROT bit. This doesn't leak any
716 * information and does not appear to cause any problems.
713 */ 717 */
714 if (address >= TASK_SIZE_MAX) 718 if (address >= TASK_SIZE_MAX)
715 error_code |= X86_PF_PROT; 719 error_code |= X86_PF_PROT;
@@ -1369,16 +1373,18 @@ void do_user_addr_fault(struct pt_regs *regs,
1369 1373
1370#ifdef CONFIG_X86_64 1374#ifdef CONFIG_X86_64
1371 /* 1375 /*
1372 * Instruction fetch faults in the vsyscall page might need 1376 * Faults in the vsyscall page might need emulation. The
1373 * emulation. The vsyscall page is at a high address 1377 * vsyscall page is at a high address (>PAGE_OFFSET), but is
1374 * (>PAGE_OFFSET), but is considered to be part of the user 1378 * considered to be part of the user address space.
1375 * address space.
1376 * 1379 *
1377 * The vsyscall page does not have a "real" VMA, so do this 1380 * The vsyscall page does not have a "real" VMA, so do this
1378 * emulation before we go searching for VMAs. 1381 * emulation before we go searching for VMAs.
1382 *
1383 * PKRU never rejects instruction fetches, so we don't need
1384 * to consider the PF_PK bit.
1379 */ 1385 */
1380 if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) { 1386 if (is_vsyscall_vaddr(address)) {
1381 if (emulate_vsyscall(regs, address)) 1387 if (emulate_vsyscall(hw_error_code, regs, address))
1382 return; 1388 return;
1383 } 1389 }
1384#endif 1390#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 693aaf28d5fe..0f01c7b1d217 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -671,23 +671,25 @@ static unsigned long __meminit
671phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, 671phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
672 unsigned long page_size_mask, bool init) 672 unsigned long page_size_mask, bool init)
673{ 673{
674 unsigned long paddr_next, paddr_last = paddr_end; 674 unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
675 unsigned long vaddr = (unsigned long)__va(paddr); 675
676 int i = p4d_index(vaddr); 676 paddr_last = paddr_end;
677 vaddr = (unsigned long)__va(paddr);
678 vaddr_end = (unsigned long)__va(paddr_end);
677 679
678 if (!pgtable_l5_enabled()) 680 if (!pgtable_l5_enabled())
679 return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, 681 return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
680 page_size_mask, init); 682 page_size_mask, init);
681 683
682 for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { 684 for (; vaddr < vaddr_end; vaddr = vaddr_next) {
683 p4d_t *p4d; 685 p4d_t *p4d = p4d_page + p4d_index(vaddr);
684 pud_t *pud; 686 pud_t *pud;
685 687
686 vaddr = (unsigned long)__va(paddr); 688 vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
687 p4d = p4d_page + p4d_index(vaddr); 689 paddr = __pa(vaddr);
688 paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
689 690
690 if (paddr >= paddr_end) { 691 if (paddr >= paddr_end) {
692 paddr_next = __pa(vaddr_next);
691 if (!after_bootmem && 693 if (!after_bootmem &&
692 !e820__mapped_any(paddr & P4D_MASK, paddr_next, 694 !e820__mapped_any(paddr & P4D_MASK, paddr_next,
693 E820_TYPE_RAM) && 695 E820_TYPE_RAM) &&
@@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
699 701
700 if (!p4d_none(*p4d)) { 702 if (!p4d_none(*p4d)) {
701 pud = pud_offset(p4d, 0); 703 pud = pud_offset(p4d, 0);
702 paddr_last = phys_pud_init(pud, paddr, paddr_end, 704 paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
703 page_size_mask, init); 705 page_size_mask, init);
704 continue; 706 continue;
705 } 707 }
706 708
707 pud = alloc_low_page(); 709 pud = alloc_low_page();
708 paddr_last = phys_pud_init(pud, paddr, paddr_end, 710 paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
709 page_size_mask, init); 711 page_size_mask, init);
710 712
711 spin_lock(&init_mm.page_table_lock); 713 spin_lock(&init_mm.page_table_lock);
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 632b83885867..3b9fd679cea9 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -728,7 +728,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
728 * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so 728 * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so
729 * page faulting on these addresses isn't expected. 729 * page faulting on these addresses isn't expected.
730 */ 730 */
731 if (phys_addr >= 0x0000 && phys_addr <= 0x0fff) 731 if (phys_addr <= 0x0fff)
732 return; 732 return;
733 733
734 /* 734 /*
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index a9c3db125222..9ad6842de4b4 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -11,3 +11,13 @@ config RAS_CEC
11 11
12 Bear in mind that this is absolutely useless if your platform doesn't 12 Bear in mind that this is absolutely useless if your platform doesn't
13 have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS. 13 have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS.
14
15config RAS_CEC_DEBUG
16 bool "CEC debugging machinery"
17 default n
18 depends on RAS_CEC
19 help
20 Add extra files to (debugfs)/ras/cec to test the correctable error
21 collector feature. "pfn" is a writable file that allows user to
22 simulate an error in a particular page frame. "array" is a read-only
23 file that dumps out the current state of all pages logged so far.
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index f8d430f88d25..f9269ae6da9c 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -240,7 +240,7 @@ static struct kmem_cache *bfq_pool;
240 * containing only random (seeky) I/O are prevented from being tagged 240 * containing only random (seeky) I/O are prevented from being tagged
241 * as soft real-time. 241 * as soft real-time.
242 */ 242 */
243#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history & -1) 243#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history == -1)
244 244
245/* Min number of samples required to perform peak-rate update */ 245/* Min number of samples required to perform peak-rate update */
246#define BFQ_RATE_MIN_SAMPLES 32 246#define BFQ_RATE_MIN_SAMPLES 32
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 2489ddbb21db..3afe327f816f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -934,6 +934,13 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
934{ 934{
935 struct elevator_type *e = q->elevator->type; 935 struct elevator_type *e = q->elevator->type;
936 936
937 /*
938 * If the parent directory has not been created yet, return, we will be
939 * called again later on and the directory/files will be created then.
940 */
941 if (!q->debugfs_dir)
942 return;
943
937 if (!e->queue_debugfs_attrs) 944 if (!e->queue_debugfs_attrs)
938 return; 945 return;
939 946
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 1ce1bf6d3bab..5f76c6e222c6 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -388,6 +388,7 @@ static void cryptd_skcipher_free(struct skcipher_instance *inst)
388 struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst); 388 struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst);
389 389
390 crypto_drop_skcipher(&ctx->spawn); 390 crypto_drop_skcipher(&ctx->spawn);
391 kfree(inst);
391} 392}
392 393
393static int cryptd_create_skcipher(struct crypto_template *tmpl, 394static int cryptd_create_skcipher(struct crypto_template *tmpl,
diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
index d5d5d155340b..c65e39005ce2 100644
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -44,6 +44,9 @@ struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
44 list_for_each_entry(q, &crypto_alg_list, cra_list) { 44 list_for_each_entry(q, &crypto_alg_list, cra_list) {
45 int match = 0; 45 int match = 0;
46 46
47 if (crypto_is_larval(q))
48 continue;
49
47 if ((q->cra_flags ^ p->cru_type) & p->cru_mask) 50 if ((q->cra_flags ^ p->cru_type) & p->cru_mask)
48 continue; 51 continue;
49 52
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index 6b3f1217a237..e7dc0133f817 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -64,6 +64,7 @@ static void power_saving_mwait_init(void)
64 case X86_VENDOR_HYGON: 64 case X86_VENDOR_HYGON:
65 case X86_VENDOR_AMD: 65 case X86_VENDOR_AMD:
66 case X86_VENDOR_INTEL: 66 case X86_VENDOR_INTEL:
67 case X86_VENDOR_ZHAOXIN:
67 /* 68 /*
68 * AMD Fam10h TSC will tick in all 69 * AMD Fam10h TSC will tick in all
69 * C/P/S0/S1 states when this bit is set. 70 * C/P/S0/S1 states when this bit is set.
diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c
index 89690a471360..e209081d644b 100644
--- a/drivers/acpi/irq.c
+++ b/drivers/acpi/irq.c
@@ -292,3 +292,29 @@ void __init acpi_set_irq_model(enum acpi_irq_model_id model,
292 acpi_irq_model = model; 292 acpi_irq_model = model;
293 acpi_gsi_domain_id = fwnode; 293 acpi_gsi_domain_id = fwnode;
294} 294}
295
296/**
297 * acpi_irq_create_hierarchy - Create a hierarchical IRQ domain with the default
298 * GSI domain as its parent.
299 * @flags: Irq domain flags associated with the domain
300 * @size: Size of the domain.
301 * @fwnode: Optional fwnode of the interrupt controller
302 * @ops: Pointer to the interrupt domain callbacks
303 * @host_data: Controller private data pointer
304 */
305struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
306 unsigned int size,
307 struct fwnode_handle *fwnode,
308 const struct irq_domain_ops *ops,
309 void *host_data)
310{
311 struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
312 DOMAIN_BUS_ANY);
313
314 if (!d)
315 return NULL;
316
317 return irq_domain_create_hierarchy(d, flags, size, fwnode, ops,
318 host_data);
319}
320EXPORT_SYMBOL_GPL(acpi_irq_create_hierarchy);
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index b72e6afaa8fb..1e7ac0bd0d3a 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -432,17 +432,40 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table,
432 } 432 }
433} 433}
434 434
435static bool flag_identical(struct acpi_table_header *table_hdr,
436 struct acpi_pptt_processor *cpu)
437{
438 struct acpi_pptt_processor *next;
439
440 /* heterogeneous machines must use PPTT revision > 1 */
441 if (table_hdr->revision < 2)
442 return false;
443
444 /* Locate the last node in the tree with IDENTICAL set */
445 if (cpu->flags & ACPI_PPTT_ACPI_IDENTICAL) {
446 next = fetch_pptt_node(table_hdr, cpu->parent);
447 if (!(next && next->flags & ACPI_PPTT_ACPI_IDENTICAL))
448 return true;
449 }
450
451 return false;
452}
453
435/* Passing level values greater than this will result in search termination */ 454/* Passing level values greater than this will result in search termination */
436#define PPTT_ABORT_PACKAGE 0xFF 455#define PPTT_ABORT_PACKAGE 0xFF
437 456
438static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr, 457static struct acpi_pptt_processor *acpi_find_processor_tag(struct acpi_table_header *table_hdr,
439 struct acpi_pptt_processor *cpu, 458 struct acpi_pptt_processor *cpu,
440 int level, int flag) 459 int level, int flag)
441{ 460{
442 struct acpi_pptt_processor *prev_node; 461 struct acpi_pptt_processor *prev_node;
443 462
444 while (cpu && level) { 463 while (cpu && level) {
445 if (cpu->flags & flag) 464 /* special case the identical flag to find last identical */
465 if (flag == ACPI_PPTT_ACPI_IDENTICAL) {
466 if (flag_identical(table_hdr, cpu))
467 break;
468 } else if (cpu->flags & flag)
446 break; 469 break;
447 pr_debug("level %d\n", level); 470 pr_debug("level %d\n", level);
448 prev_node = fetch_pptt_node(table_hdr, cpu->parent); 471 prev_node = fetch_pptt_node(table_hdr, cpu->parent);
@@ -480,8 +503,8 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
480 503
481 cpu_node = acpi_find_processor_node(table, acpi_cpu_id); 504 cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
482 if (cpu_node) { 505 if (cpu_node) {
483 cpu_node = acpi_find_processor_package_id(table, cpu_node, 506 cpu_node = acpi_find_processor_tag(table, cpu_node,
484 level, flag); 507 level, flag);
485 /* 508 /*
486 * As per specification if the processor structure represents 509 * As per specification if the processor structure represents
487 * an actual processor, then ACPI processor ID must be valid. 510 * an actual processor, then ACPI processor ID must be valid.
@@ -660,3 +683,29 @@ int find_acpi_cpu_topology_package(unsigned int cpu)
660 return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, 683 return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
661 ACPI_PPTT_PHYSICAL_PACKAGE); 684 ACPI_PPTT_PHYSICAL_PACKAGE);
662} 685}
686
687/**
688 * find_acpi_cpu_topology_hetero_id() - Get a core architecture tag
689 * @cpu: Kernel logical CPU number
690 *
691 * Determine a unique heterogeneous tag for the given CPU. CPUs with the same
692 * implementation should have matching tags.
693 *
694 * The returned tag can be used to group peers with identical implementation.
695 *
696 * The search terminates when a level is found with the identical implementation
697 * flag set or we reach a root node.
698 *
699 * Due to limitations in the PPTT data structure, there may be rare situations
700 * where two cores in a heterogeneous machine may be identical, but won't have
701 * the same tag.
702 *
703 * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
704 * Otherwise returns a value which represents a group of identical cores
705 * similar to this CPU.
706 */
707int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
708{
709 return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
710 ACPI_PPTT_ACPI_IDENTICAL);
711}
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index e387a258d649..ed56c6d20b08 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -196,6 +196,7 @@ static void tsc_check_state(int state)
196 case X86_VENDOR_AMD: 196 case X86_VENDOR_AMD:
197 case X86_VENDOR_INTEL: 197 case X86_VENDOR_INTEL:
198 case X86_VENDOR_CENTAUR: 198 case X86_VENDOR_CENTAUR:
199 case X86_VENDOR_ZHAOXIN:
199 /* 200 /*
200 * AMD Fam10h TSC will tick in all 201 * AMD Fam10h TSC will tick in all
201 * C/P/S0/S1 states when this bit is set. 202 * C/P/S0/S1 states when this bit is set.
diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c
index 40c8a552a478..4074886b7bc8 100644
--- a/drivers/auxdisplay/cfag12864bfb.c
+++ b/drivers/auxdisplay/cfag12864bfb.c
@@ -52,8 +52,9 @@ static const struct fb_var_screeninfo cfag12864bfb_var = {
52 52
53static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma) 53static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
54{ 54{
55 return vm_insert_page(vma, vma->vm_start, 55 struct page *pages = virt_to_page(cfag12864b_buffer);
56 virt_to_page(cfag12864b_buffer)); 56
57 return vm_map_pages_zero(vma, &pages, 1);
57} 58}
58 59
59static struct fb_ops cfag12864bfb_ops = { 60static struct fb_ops cfag12864bfb_ops = {
diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index 21393ec3b9a4..9c0bb771751d 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -223,9 +223,9 @@ static const struct backlight_ops ht16k33_bl_ops = {
223static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma) 223static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma)
224{ 224{
225 struct ht16k33_priv *priv = info->par; 225 struct ht16k33_priv *priv = info->par;
226 struct page *pages = virt_to_page(priv->fbdev.buffer);
226 227
227 return vm_insert_page(vma, vma->vm_start, 228 return vm_map_pages_zero(vma, &pages, 1);
228 virt_to_page(priv->fbdev.buffer));
229} 229}
230 230
231static struct fb_ops ht16k33_fb_ops = { 231static struct fb_ops ht16k33_fb_ops = {
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index a7359535caf5..8827c60f51e2 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -213,6 +213,8 @@ int __weak cache_setup_acpi(unsigned int cpu)
213 return -ENOTSUPP; 213 return -ENOTSUPP;
214} 214}
215 215
216unsigned int coherency_max_size;
217
216static int cache_shared_cpu_map_setup(unsigned int cpu) 218static int cache_shared_cpu_map_setup(unsigned int cpu)
217{ 219{
218 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 220 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -251,6 +253,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
251 cpumask_set_cpu(i, &this_leaf->shared_cpu_map); 253 cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
252 } 254 }
253 } 255 }
256 /* record the maximum cache line size */
257 if (this_leaf->coherency_line_size > coherency_max_size)
258 coherency_max_size = this_leaf->coherency_line_size;
254 } 259 }
255 260
256 return 0; 261 return 0;
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 658664a5a5aa..df1edb5ec0ad 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -1311,8 +1311,7 @@ static void ipi_handler(void *null)
1311 1311
1312void global_cache_flush(void) 1312void global_cache_flush(void)
1313{ 1313{
1314 if (on_each_cpu(ipi_handler, NULL, 1) != 0) 1314 on_each_cpu(ipi_handler, NULL, 1);
1315 panic(PFX "timed out waiting for the other CPUs!\n");
1316} 1315}
1317EXPORT_SYMBOL(global_cache_flush); 1316EXPORT_SYMBOL(global_cache_flush);
1318 1317
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index aa51756fd4d6..87b410d6e51d 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -368,7 +368,7 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index)
368 const char *dev_id = dev ? dev_name(dev) : NULL; 368 const char *dev_id = dev ? dev_name(dev) : NULL;
369 struct device_node *np = core->of_node; 369 struct device_node *np = core->of_node;
370 370
371 if (np && index >= 0) 371 if (np && (name || index >= 0))
372 hw = of_clk_get_hw(np, index, name); 372 hw = of_clk_get_hw(np, index, name);
373 373
374 /* 374 /*
diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c
index 739f64fdf1e3..206fafd299ea 100644
--- a/drivers/clk/meson/g12a.c
+++ b/drivers/clk/meson/g12a.c
@@ -2734,8 +2734,8 @@ static struct clk_hw_onecell_data g12a_hw_onecell_data = {
2734 [CLKID_MALI_1_DIV] = &g12a_mali_1_div.hw, 2734 [CLKID_MALI_1_DIV] = &g12a_mali_1_div.hw,
2735 [CLKID_MALI_1] = &g12a_mali_1.hw, 2735 [CLKID_MALI_1] = &g12a_mali_1.hw,
2736 [CLKID_MALI] = &g12a_mali.hw, 2736 [CLKID_MALI] = &g12a_mali.hw,
2737 [CLKID_MPLL_5OM_DIV] = &g12a_mpll_50m_div.hw, 2737 [CLKID_MPLL_50M_DIV] = &g12a_mpll_50m_div.hw,
2738 [CLKID_MPLL_5OM] = &g12a_mpll_50m.hw, 2738 [CLKID_MPLL_50M] = &g12a_mpll_50m.hw,
2739 [CLKID_SYS_PLL_DIV16_EN] = &g12a_sys_pll_div16_en.hw, 2739 [CLKID_SYS_PLL_DIV16_EN] = &g12a_sys_pll_div16_en.hw,
2740 [CLKID_SYS_PLL_DIV16] = &g12a_sys_pll_div16.hw, 2740 [CLKID_SYS_PLL_DIV16] = &g12a_sys_pll_div16.hw,
2741 [CLKID_CPU_CLK_DYN0_SEL] = &g12a_cpu_clk_premux0.hw, 2741 [CLKID_CPU_CLK_DYN0_SEL] = &g12a_cpu_clk_premux0.hw,
diff --git a/drivers/clk/meson/g12a.h b/drivers/clk/meson/g12a.h
index 39c41af70804..bcc05cd9882f 100644
--- a/drivers/clk/meson/g12a.h
+++ b/drivers/clk/meson/g12a.h
@@ -166,7 +166,7 @@
166#define CLKID_HDMI_DIV 167 166#define CLKID_HDMI_DIV 167
167#define CLKID_MALI_0_DIV 170 167#define CLKID_MALI_0_DIV 170
168#define CLKID_MALI_1_DIV 173 168#define CLKID_MALI_1_DIV 173
169#define CLKID_MPLL_5OM_DIV 176 169#define CLKID_MPLL_50M_DIV 176
170#define CLKID_SYS_PLL_DIV16_EN 178 170#define CLKID_SYS_PLL_DIV16_EN 178
171#define CLKID_SYS_PLL_DIV16 179 171#define CLKID_SYS_PLL_DIV16 179
172#define CLKID_CPU_CLK_DYN0_SEL 180 172#define CLKID_CPU_CLK_DYN0_SEL 180
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
index 37cf0f01bb5d..62cd3a7f1f65 100644
--- a/drivers/clk/meson/meson8b.c
+++ b/drivers/clk/meson/meson8b.c
@@ -1761,7 +1761,7 @@ static struct clk_regmap meson8m2_gp_pll = {
1761 }, 1761 },
1762}; 1762};
1763 1763
1764static const char * const mmeson8b_vpu_0_1_parent_names[] = { 1764static const char * const meson8b_vpu_0_1_parent_names[] = {
1765 "fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7" 1765 "fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7"
1766}; 1766};
1767 1767
@@ -1778,8 +1778,8 @@ static struct clk_regmap meson8b_vpu_0_sel = {
1778 .hw.init = &(struct clk_init_data){ 1778 .hw.init = &(struct clk_init_data){
1779 .name = "vpu_0_sel", 1779 .name = "vpu_0_sel",
1780 .ops = &clk_regmap_mux_ops, 1780 .ops = &clk_regmap_mux_ops,
1781 .parent_names = mmeson8b_vpu_0_1_parent_names, 1781 .parent_names = meson8b_vpu_0_1_parent_names,
1782 .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names), 1782 .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names),
1783 .flags = CLK_SET_RATE_PARENT, 1783 .flags = CLK_SET_RATE_PARENT,
1784 }, 1784 },
1785}; 1785};
@@ -1837,8 +1837,8 @@ static struct clk_regmap meson8b_vpu_1_sel = {
1837 .hw.init = &(struct clk_init_data){ 1837 .hw.init = &(struct clk_init_data){
1838 .name = "vpu_1_sel", 1838 .name = "vpu_1_sel",
1839 .ops = &clk_regmap_mux_ops, 1839 .ops = &clk_regmap_mux_ops,
1840 .parent_names = mmeson8b_vpu_0_1_parent_names, 1840 .parent_names = meson8b_vpu_0_1_parent_names,
1841 .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names), 1841 .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names),
1842 .flags = CLK_SET_RATE_PARENT, 1842 .flags = CLK_SET_RATE_PARENT,
1843 }, 1843 },
1844}; 1844};
diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c
index 8281dfbf38c2..5bed36e12951 100644
--- a/drivers/clk/socfpga/clk-s10.c
+++ b/drivers/clk/socfpga/clk-s10.c
@@ -103,9 +103,9 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = {
103 { STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux), 103 { STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux),
104 0, 0, 0, 0x3C, 1}, 104 0, 0, 0, 0x3C, 1},
105 { STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux), 105 { STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux),
106 0, 0, 4, 0xB0, 0}, 106 0, 0, 2, 0xB0, 0},
107 { STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux), 107 { STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux),
108 0, 0, 4, 0xB0, 1}, 108 0, 0, 2, 0xB0, 1},
109 { STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux, 109 { STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux,
110 ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2}, 110 ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2},
111 { STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux, 111 { STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux,
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index e1ba62d2b1a0..ac1d27a8c650 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -3366,6 +3366,8 @@ static struct tegra_clk_init_table init_table[] __initdata = {
3366 { TEGRA210_CLK_I2S3_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, 3366 { TEGRA210_CLK_I2S3_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
3367 { TEGRA210_CLK_I2S4_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, 3367 { TEGRA210_CLK_I2S4_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
3368 { TEGRA210_CLK_VIMCLK_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, 3368 { TEGRA210_CLK_VIMCLK_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
3369 { TEGRA210_CLK_HDA, TEGRA210_CLK_PLL_P, 51000000, 0 },
3370 { TEGRA210_CLK_HDA2CODEC_2X, TEGRA210_CLK_PLL_P, 48000000, 0 },
3369 /* This MUST be the last entry. */ 3371 /* This MUST be the last entry. */
3370 { TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 }, 3372 { TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 },
3371}; 3373};
diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c
index 8e834317c97d..975995eea15c 100644
--- a/drivers/clk/ti/clkctrl.c
+++ b/drivers/clk/ti/clkctrl.c
@@ -229,6 +229,7 @@ static struct clk_hw *_ti_omap4_clkctrl_xlate(struct of_phandle_args *clkspec,
229{ 229{
230 struct omap_clkctrl_provider *provider = data; 230 struct omap_clkctrl_provider *provider = data;
231 struct omap_clkctrl_clk *entry; 231 struct omap_clkctrl_clk *entry;
232 bool found = false;
232 233
233 if (clkspec->args_count != 2) 234 if (clkspec->args_count != 2)
234 return ERR_PTR(-EINVAL); 235 return ERR_PTR(-EINVAL);
@@ -238,11 +239,13 @@ static struct clk_hw *_ti_omap4_clkctrl_xlate(struct of_phandle_args *clkspec,
238 239
239 list_for_each_entry(entry, &provider->clocks, node) { 240 list_for_each_entry(entry, &provider->clocks, node) {
240 if (entry->reg_offset == clkspec->args[0] && 241 if (entry->reg_offset == clkspec->args[0] &&
241 entry->bit_offset == clkspec->args[1]) 242 entry->bit_offset == clkspec->args[1]) {
243 found = true;
242 break; 244 break;
245 }
243 } 246 }
244 247
245 if (!entry) 248 if (!found)
246 return ERR_PTR(-EINVAL); 249 return ERR_PTR(-EINVAL);
247 250
248 return entry->clk; 251 return entry->clk;
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 3300739edce4..5e9317dc3d39 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -43,6 +43,11 @@ config BCM_KONA_TIMER
43 help 43 help
44 Enables the support for the BCM Kona mobile timer driver. 44 Enables the support for the BCM Kona mobile timer driver.
45 45
46config DAVINCI_TIMER
47 bool "Texas Instruments DaVinci timer driver" if COMPILE_TEST
48 help
49 Enables the support for the TI DaVinci timer driver.
50
46config DIGICOLOR_TIMER 51config DIGICOLOR_TIMER
47 bool "Digicolor timer driver" if COMPILE_TEST 52 bool "Digicolor timer driver" if COMPILE_TEST
48 select CLKSRC_MMIO 53 select CLKSRC_MMIO
@@ -140,7 +145,7 @@ config TEGRA_TIMER
140 bool "Tegra timer driver" if COMPILE_TEST 145 bool "Tegra timer driver" if COMPILE_TEST
141 select CLKSRC_MMIO 146 select CLKSRC_MMIO
142 select TIMER_OF 147 select TIMER_OF
143 depends on ARM || ARM64 148 depends on ARCH_TEGRA || COMPILE_TEST
144 help 149 help
145 Enables support for the Tegra driver. 150 Enables support for the Tegra driver.
146 151
@@ -617,6 +622,13 @@ config CLKSRC_IMX_TPM
617 Enable this option to use IMX Timer/PWM Module (TPM) timer as 622 Enable this option to use IMX Timer/PWM Module (TPM) timer as
618 clocksource. 623 clocksource.
619 624
625config TIMER_IMX_SYS_CTR
626 bool "i.MX system counter timer" if COMPILE_TEST
627 select TIMER_OF
628 help
629 Enable this option to use i.MX system counter timer as a
630 clockevent.
631
620config CLKSRC_ST_LPC 632config CLKSRC_ST_LPC
621 bool "Low power clocksource found in the LPC" if COMPILE_TEST 633 bool "Low power clocksource found in the LPC" if COMPILE_TEST
622 select TIMER_OF if OF 634 select TIMER_OF if OF
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 236858fa7fbf..2e7936e7833f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o
15obj-$(CONFIG_EM_TIMER_STI) += em_sti.o 15obj-$(CONFIG_EM_TIMER_STI) += em_sti.o
16obj-$(CONFIG_CLKBLD_I8253) += i8253.o 16obj-$(CONFIG_CLKBLD_I8253) += i8253.o
17obj-$(CONFIG_CLKSRC_MMIO) += mmio.o 17obj-$(CONFIG_CLKSRC_MMIO) += mmio.o
18obj-$(CONFIG_DAVINCI_TIMER) += timer-davinci.o
18obj-$(CONFIG_DIGICOLOR_TIMER) += timer-digicolor.o 19obj-$(CONFIG_DIGICOLOR_TIMER) += timer-digicolor.o
19obj-$(CONFIG_OMAP_DM_TIMER) += timer-ti-dm.o 20obj-$(CONFIG_OMAP_DM_TIMER) += timer-ti-dm.o
20obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o 21obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o
@@ -36,7 +37,7 @@ obj-$(CONFIG_U300_TIMER) += timer-u300.o
36obj-$(CONFIG_SUN4I_TIMER) += timer-sun4i.o 37obj-$(CONFIG_SUN4I_TIMER) += timer-sun4i.o
37obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o 38obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o
38obj-$(CONFIG_MESON6_TIMER) += timer-meson6.o 39obj-$(CONFIG_MESON6_TIMER) += timer-meson6.o
39obj-$(CONFIG_TEGRA_TIMER) += timer-tegra20.o 40obj-$(CONFIG_TEGRA_TIMER) += timer-tegra.o
40obj-$(CONFIG_VT8500_TIMER) += timer-vt8500.o 41obj-$(CONFIG_VT8500_TIMER) += timer-vt8500.o
41obj-$(CONFIG_NSPIRE_TIMER) += timer-zevio.o 42obj-$(CONFIG_NSPIRE_TIMER) += timer-zevio.o
42obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o 43obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o
@@ -74,6 +75,7 @@ obj-$(CONFIG_CLKSRC_MIPS_GIC) += mips-gic-timer.o
74obj-$(CONFIG_CLKSRC_TANGO_XTAL) += timer-tango-xtal.o 75obj-$(CONFIG_CLKSRC_TANGO_XTAL) += timer-tango-xtal.o
75obj-$(CONFIG_CLKSRC_IMX_GPT) += timer-imx-gpt.o 76obj-$(CONFIG_CLKSRC_IMX_GPT) += timer-imx-gpt.o
76obj-$(CONFIG_CLKSRC_IMX_TPM) += timer-imx-tpm.o 77obj-$(CONFIG_CLKSRC_IMX_TPM) += timer-imx-tpm.o
78obj-$(CONFIG_TIMER_IMX_SYS_CTR) += timer-imx-sysctr.o
77obj-$(CONFIG_ASM9260_TIMER) += asm9260_timer.o 79obj-$(CONFIG_ASM9260_TIMER) += asm9260_timer.o
78obj-$(CONFIG_H8300_TMR8) += h8300_timer8.o 80obj-$(CONFIG_H8300_TMR8) += h8300_timer8.o
79obj-$(CONFIG_H8300_TMR16) += h8300_timer16.o 81obj-$(CONFIG_H8300_TMR16) += h8300_timer16.o
@@ -84,3 +86,4 @@ obj-$(CONFIG_ATCPIT100_TIMER) += timer-atcpit100.o
84obj-$(CONFIG_RISCV_TIMER) += timer-riscv.o 86obj-$(CONFIG_RISCV_TIMER) += timer-riscv.o
85obj-$(CONFIG_CSKY_MP_TIMER) += timer-mp-csky.o 87obj-$(CONFIG_CSKY_MP_TIMER) += timer-mp-csky.o
86obj-$(CONFIG_GX6605S_TIMER) += timer-gx6605s.o 88obj-$(CONFIG_GX6605S_TIMER) += timer-gx6605s.o
89obj-$(CONFIG_HYPERV_TIMER) += hyperv_timer.o
diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c
index ebfbccefc7b3..b29b5a75333e 100644
--- a/drivers/clocksource/arc_timer.c
+++ b/drivers/clocksource/arc_timer.c
@@ -13,6 +13,7 @@
13 */ 13 */
14 14
15#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/bits.h>
16#include <linux/clk.h> 17#include <linux/clk.h>
17#include <linux/clk-provider.h> 18#include <linux/clk-provider.h>
18#include <linux/clocksource.h> 19#include <linux/clocksource.h>
@@ -139,7 +140,7 @@ static u64 arc_read_rtc(struct clocksource *cs)
139 l = read_aux_reg(AUX_RTC_LOW); 140 l = read_aux_reg(AUX_RTC_LOW);
140 h = read_aux_reg(AUX_RTC_HIGH); 141 h = read_aux_reg(AUX_RTC_HIGH);
141 status = read_aux_reg(AUX_RTC_CTRL); 142 status = read_aux_reg(AUX_RTC_CTRL);
142 } while (!(status & _BITUL(31))); 143 } while (!(status & BIT(31)));
143 144
144 return (((u64)h) << 32) | l; 145 return (((u64)h) << 32) | l;
145} 146}
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 07e57a49d1e8..9a5464c625b4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -801,14 +801,7 @@ static void arch_timer_evtstrm_enable(int divider)
801 cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) 801 cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
802 | ARCH_TIMER_VIRT_EVT_EN; 802 | ARCH_TIMER_VIRT_EVT_EN;
803 arch_timer_set_cntkctl(cntkctl); 803 arch_timer_set_cntkctl(cntkctl);
804#ifdef CONFIG_ARM64 804 arch_timer_set_evtstrm_feature();
805 cpu_set_named_feature(EVTSTRM);
806#else
807 elf_hwcap |= HWCAP_EVTSTRM;
808#endif
809#ifdef CONFIG_COMPAT
810 compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
811#endif
812 cpumask_set_cpu(smp_processor_id(), &evtstrm_available); 805 cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
813} 806}
814 807
@@ -1037,11 +1030,7 @@ static int arch_timer_cpu_pm_notify(struct notifier_block *self,
1037 } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) { 1030 } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
1038 arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl)); 1031 arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
1039 1032
1040#ifdef CONFIG_ARM64 1033 if (arch_timer_have_evtstrm_feature())
1041 if (cpu_have_named_feature(EVTSTRM))
1042#else
1043 if (elf_hwcap & HWCAP_EVTSTRM)
1044#endif
1045 cpumask_set_cpu(smp_processor_id(), &evtstrm_available); 1034 cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
1046 } 1035 }
1047 return NOTIFY_OK; 1036 return NOTIFY_OK;
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index e8eab16b154b..74cb299f5089 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -206,7 +206,7 @@ static void exynos4_frc_resume(struct clocksource *cs)
206 206
207static struct clocksource mct_frc = { 207static struct clocksource mct_frc = {
208 .name = "mct-frc", 208 .name = "mct-frc",
209 .rating = 400, 209 .rating = 450, /* use value higher than ARM arch timer */
210 .read = exynos4_frc_read, 210 .read = exynos4_frc_read,
211 .mask = CLOCKSOURCE_MASK(32), 211 .mask = CLOCKSOURCE_MASK(32),
212 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 212 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -461,7 +461,7 @@ static int exynos4_mct_starting_cpu(unsigned int cpu)
461 evt->set_state_oneshot_stopped = set_state_shutdown; 461 evt->set_state_oneshot_stopped = set_state_shutdown;
462 evt->tick_resume = set_state_shutdown; 462 evt->tick_resume = set_state_shutdown;
463 evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; 463 evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
464 evt->rating = 450; 464 evt->rating = 500; /* use value higher than ARM arch timer */
465 465
466 exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET); 466 exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET);
467 467
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
new file mode 100644
index 000000000000..ba2c79e6a0ee
--- /dev/null
+++ b/drivers/clocksource/hyperv_timer.c
@@ -0,0 +1,339 @@
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Clocksource driver for the synthetic counter and timers
5 * provided by the Hyper-V hypervisor to guest VMs, as described
6 * in the Hyper-V Top Level Functional Spec (TLFS). This driver
7 * is instruction set architecture independent.
8 *
9 * Copyright (C) 2019, Microsoft, Inc.
10 *
11 * Author: Michael Kelley <mikelley@microsoft.com>
12 */
13
14#include <linux/percpu.h>
15#include <linux/cpumask.h>
16#include <linux/clockchips.h>
17#include <linux/clocksource.h>
18#include <linux/sched_clock.h>
19#include <linux/mm.h>
20#include <clocksource/hyperv_timer.h>
21#include <asm/hyperv-tlfs.h>
22#include <asm/mshyperv.h>
23
24static struct clock_event_device __percpu *hv_clock_event;
25
26/*
27 * If false, we're using the old mechanism for stimer0 interrupts
28 * where it sends a VMbus message when it expires. The old
29 * mechanism is used when running on older versions of Hyper-V
30 * that don't support Direct Mode. While Hyper-V provides
31 * four stimer's per CPU, Linux uses only stimer0.
32 */
33static bool direct_mode_enabled;
34
35static int stimer0_irq;
36static int stimer0_vector;
37static int stimer0_message_sint;
38
39/*
40 * ISR for when stimer0 is operating in Direct Mode. Direct Mode
41 * does not use VMbus or any VMbus messages, so process here and not
42 * in the VMbus driver code.
43 */
44void hv_stimer0_isr(void)
45{
46 struct clock_event_device *ce;
47
48 ce = this_cpu_ptr(hv_clock_event);
49 ce->event_handler(ce);
50}
51EXPORT_SYMBOL_GPL(hv_stimer0_isr);
52
53static int hv_ce_set_next_event(unsigned long delta,
54 struct clock_event_device *evt)
55{
56 u64 current_tick;
57
58 current_tick = hyperv_cs->read(NULL);
59 current_tick += delta;
60 hv_init_timer(0, current_tick);
61 return 0;
62}
63
64static int hv_ce_shutdown(struct clock_event_device *evt)
65{
66 hv_init_timer(0, 0);
67 hv_init_timer_config(0, 0);
68 if (direct_mode_enabled)
69 hv_disable_stimer0_percpu_irq(stimer0_irq);
70
71 return 0;
72}
73
74static int hv_ce_set_oneshot(struct clock_event_device *evt)
75{
76 union hv_stimer_config timer_cfg;
77
78 timer_cfg.as_uint64 = 0;
79 timer_cfg.enable = 1;
80 timer_cfg.auto_enable = 1;
81 if (direct_mode_enabled) {
82 /*
83 * When it expires, the timer will directly interrupt
84 * on the specified hardware vector/IRQ.
85 */
86 timer_cfg.direct_mode = 1;
87 timer_cfg.apic_vector = stimer0_vector;
88 hv_enable_stimer0_percpu_irq(stimer0_irq);
89 } else {
90 /*
91 * When it expires, the timer will generate a VMbus message,
92 * to be handled by the normal VMbus interrupt handler.
93 */
94 timer_cfg.direct_mode = 0;
95 timer_cfg.sintx = stimer0_message_sint;
96 }
97 hv_init_timer_config(0, timer_cfg.as_uint64);
98 return 0;
99}
100
101/*
102 * hv_stimer_init - Per-cpu initialization of the clockevent
103 */
104void hv_stimer_init(unsigned int cpu)
105{
106 struct clock_event_device *ce;
107
108 /*
109 * Synthetic timers are always available except on old versions of
110 * Hyper-V on x86. In that case, just return as Linux will use a
111 * clocksource based on emulated PIT or LAPIC timer hardware.
112 */
113 if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
114 return;
115
116 ce = per_cpu_ptr(hv_clock_event, cpu);
117 ce->name = "Hyper-V clockevent";
118 ce->features = CLOCK_EVT_FEAT_ONESHOT;
119 ce->cpumask = cpumask_of(cpu);
120 ce->rating = 1000;
121 ce->set_state_shutdown = hv_ce_shutdown;
122 ce->set_state_oneshot = hv_ce_set_oneshot;
123 ce->set_next_event = hv_ce_set_next_event;
124
125 clockevents_config_and_register(ce,
126 HV_CLOCK_HZ,
127 HV_MIN_DELTA_TICKS,
128 HV_MAX_MAX_DELTA_TICKS);
129}
130EXPORT_SYMBOL_GPL(hv_stimer_init);
131
132/*
133 * hv_stimer_cleanup - Per-cpu cleanup of the clockevent
134 */
135void hv_stimer_cleanup(unsigned int cpu)
136{
137 struct clock_event_device *ce;
138
139 /* Turn off clockevent device */
140 if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
141 ce = per_cpu_ptr(hv_clock_event, cpu);
142 hv_ce_shutdown(ce);
143 }
144}
145EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
146
147/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
148int hv_stimer_alloc(int sint)
149{
150 int ret;
151
152 hv_clock_event = alloc_percpu(struct clock_event_device);
153 if (!hv_clock_event)
154 return -ENOMEM;
155
156 direct_mode_enabled = ms_hyperv.misc_features &
157 HV_STIMER_DIRECT_MODE_AVAILABLE;
158 if (direct_mode_enabled) {
159 ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
160 hv_stimer0_isr);
161 if (ret) {
162 free_percpu(hv_clock_event);
163 hv_clock_event = NULL;
164 return ret;
165 }
166 }
167
168 stimer0_message_sint = sint;
169 return 0;
170}
171EXPORT_SYMBOL_GPL(hv_stimer_alloc);
172
173/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
174void hv_stimer_free(void)
175{
176 if (direct_mode_enabled && (stimer0_irq != 0)) {
177 hv_remove_stimer0_irq(stimer0_irq);
178 stimer0_irq = 0;
179 }
180 free_percpu(hv_clock_event);
181 hv_clock_event = NULL;
182}
183EXPORT_SYMBOL_GPL(hv_stimer_free);
184
185/*
186 * Do a global cleanup of clockevents for the cases of kexec and
187 * vmbus exit
188 */
189void hv_stimer_global_cleanup(void)
190{
191 int cpu;
192 struct clock_event_device *ce;
193
194 if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
195 for_each_present_cpu(cpu) {
196 ce = per_cpu_ptr(hv_clock_event, cpu);
197 clockevents_unbind_device(ce, cpu);
198 }
199 }
200 hv_stimer_free();
201}
202EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
203
204/*
205 * Code and definitions for the Hyper-V clocksources. Two
206 * clocksources are defined: one that reads the Hyper-V defined MSR, and
207 * the other that uses the TSC reference page feature as defined in the
208 * TLFS. The MSR version is for compatibility with old versions of
209 * Hyper-V and 32-bit x86. The TSC reference page version is preferred.
210 */
211
212struct clocksource *hyperv_cs;
213EXPORT_SYMBOL_GPL(hyperv_cs);
214
215#ifdef CONFIG_HYPERV_TSCPAGE
216
217static struct ms_hyperv_tsc_page *tsc_pg;
218
219struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
220{
221 return tsc_pg;
222}
223EXPORT_SYMBOL_GPL(hv_get_tsc_page);
224
225static u64 notrace read_hv_sched_clock_tsc(void)
226{
227 u64 current_tick = hv_read_tsc_page(tsc_pg);
228
229 if (current_tick == U64_MAX)
230 hv_get_time_ref_count(current_tick);
231
232 return current_tick;
233}
234
235static u64 read_hv_clock_tsc(struct clocksource *arg)
236{
237 return read_hv_sched_clock_tsc();
238}
239
240static struct clocksource hyperv_cs_tsc = {
241 .name = "hyperv_clocksource_tsc_page",
242 .rating = 400,
243 .read = read_hv_clock_tsc,
244 .mask = CLOCKSOURCE_MASK(64),
245 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
246};
247#endif
248
249static u64 notrace read_hv_sched_clock_msr(void)
250{
251 u64 current_tick;
252 /*
253 * Read the partition counter to get the current tick count. This count
254 * is set to 0 when the partition is created and is incremented in
255 * 100 nanosecond units.
256 */
257 hv_get_time_ref_count(current_tick);
258 return current_tick;
259}
260
261static u64 read_hv_clock_msr(struct clocksource *arg)
262{
263 return read_hv_sched_clock_msr();
264}
265
266static struct clocksource hyperv_cs_msr = {
267 .name = "hyperv_clocksource_msr",
268 .rating = 400,
269 .read = read_hv_clock_msr,
270 .mask = CLOCKSOURCE_MASK(64),
271 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
272};
273
274#ifdef CONFIG_HYPERV_TSCPAGE
275static bool __init hv_init_tsc_clocksource(void)
276{
277 u64 tsc_msr;
278 phys_addr_t phys_addr;
279
280 if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
281 return false;
282
283 tsc_pg = vmalloc(PAGE_SIZE);
284 if (!tsc_pg)
285 return false;
286
287 hyperv_cs = &hyperv_cs_tsc;
288 phys_addr = page_to_phys(vmalloc_to_page(tsc_pg));
289
290 /*
291 * The Hyper-V TLFS specifies to preserve the value of reserved
292 * bits in registers. So read the existing value, preserve the
293 * low order 12 bits, and add in the guest physical address
294 * (which already has at least the low 12 bits set to zero since
295 * it is page aligned). Also set the "enable" bit, which is bit 0.
296 */
297 hv_get_reference_tsc(tsc_msr);
298 tsc_msr &= GENMASK_ULL(11, 0);
299 tsc_msr = tsc_msr | 0x1 | (u64)phys_addr;
300 hv_set_reference_tsc(tsc_msr);
301
302 hv_set_clocksource_vdso(hyperv_cs_tsc);
303 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
304
305 /* sched_clock_register is needed on ARM64 but is a no-op on x86 */
306 sched_clock_register(read_hv_sched_clock_tsc, 64, HV_CLOCK_HZ);
307 return true;
308}
309#else
310static bool __init hv_init_tsc_clocksource(void)
311{
312 return false;
313}
314#endif
315
316
317void __init hv_init_clocksource(void)
318{
319 /*
320 * Try to set up the TSC page clocksource. If it succeeds, we're
321 * done. Otherwise, set up the MSR clocksoruce. At least one of
322 * these will always be available except on very old versions of
323 * Hyper-V on x86. In that case we won't have a Hyper-V
324 * clocksource, but Linux will still run with a clocksource based
325 * on the emulated PIT or LAPIC timer.
326 */
327 if (hv_init_tsc_clocksource())
328 return;
329
330 if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE))
331 return;
332
333 hyperv_cs = &hyperv_cs_msr;
334 clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
335
336 /* sched_clock_register is needed on ARM64 but is a no-op on x86 */
337 sched_clock_register(read_hv_sched_clock_msr, 64, HV_CLOCK_HZ);
338}
339EXPORT_SYMBOL_GPL(hv_init_clocksource);
diff --git a/drivers/clocksource/timer-davinci.c b/drivers/clocksource/timer-davinci.c
new file mode 100644
index 000000000000..62745c962049
--- /dev/null
+++ b/drivers/clocksource/timer-davinci.c
@@ -0,0 +1,369 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * TI DaVinci clocksource driver
4 *
5 * Copyright (C) 2019 Texas Instruments
6 * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
7 * (with tiny parts adopted from code by Kevin Hilman <khilman@baylibre.com>)
8 */
9
10#include <linux/clk.h>
11#include <linux/clockchips.h>
12#include <linux/interrupt.h>
13#include <linux/kernel.h>
14#include <linux/of_address.h>
15#include <linux/of_irq.h>
16#include <linux/sched_clock.h>
17
18#include <clocksource/timer-davinci.h>
19
20#undef pr_fmt
21#define pr_fmt(fmt) "%s: " fmt "\n", __func__
22
23#define DAVINCI_TIMER_REG_TIM12 0x10
24#define DAVINCI_TIMER_REG_TIM34 0x14
25#define DAVINCI_TIMER_REG_PRD12 0x18
26#define DAVINCI_TIMER_REG_PRD34 0x1c
27#define DAVINCI_TIMER_REG_TCR 0x20
28#define DAVINCI_TIMER_REG_TGCR 0x24
29
30#define DAVINCI_TIMER_TIMMODE_MASK GENMASK(3, 2)
31#define DAVINCI_TIMER_RESET_MASK GENMASK(1, 0)
32#define DAVINCI_TIMER_TIMMODE_32BIT_UNCHAINED BIT(2)
33#define DAVINCI_TIMER_UNRESET GENMASK(1, 0)
34
35#define DAVINCI_TIMER_ENAMODE_MASK GENMASK(1, 0)
36#define DAVINCI_TIMER_ENAMODE_DISABLED 0x00
37#define DAVINCI_TIMER_ENAMODE_ONESHOT BIT(0)
38#define DAVINCI_TIMER_ENAMODE_PERIODIC BIT(1)
39
40#define DAVINCI_TIMER_ENAMODE_SHIFT_TIM12 6
41#define DAVINCI_TIMER_ENAMODE_SHIFT_TIM34 22
42
43#define DAVINCI_TIMER_MIN_DELTA 0x01
44#define DAVINCI_TIMER_MAX_DELTA 0xfffffffe
45
46#define DAVINCI_TIMER_CLKSRC_BITS 32
47
48#define DAVINCI_TIMER_TGCR_DEFAULT \
49 (DAVINCI_TIMER_TIMMODE_32BIT_UNCHAINED | DAVINCI_TIMER_UNRESET)
50
51struct davinci_clockevent {
52 struct clock_event_device dev;
53 void __iomem *base;
54 unsigned int cmp_off;
55};
56
57/*
58 * This must be globally accessible by davinci_timer_read_sched_clock(), so
59 * let's keep it here.
60 */
61static struct {
62 struct clocksource dev;
63 void __iomem *base;
64 unsigned int tim_off;
65} davinci_clocksource;
66
67static struct davinci_clockevent *
68to_davinci_clockevent(struct clock_event_device *clockevent)
69{
70 return container_of(clockevent, struct davinci_clockevent, dev);
71}
72
73static unsigned int
74davinci_clockevent_read(struct davinci_clockevent *clockevent,
75 unsigned int reg)
76{
77 return readl_relaxed(clockevent->base + reg);
78}
79
80static void davinci_clockevent_write(struct davinci_clockevent *clockevent,
81 unsigned int reg, unsigned int val)
82{
83 writel_relaxed(val, clockevent->base + reg);
84}
85
86static void davinci_tim12_shutdown(void __iomem *base)
87{
88 unsigned int tcr;
89
90 tcr = DAVINCI_TIMER_ENAMODE_DISABLED <<
91 DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
92 /*
93 * This function is only ever called if we're using both timer
94 * halves. In this case TIM34 runs in periodic mode and we must
95 * not modify it.
96 */
97 tcr |= DAVINCI_TIMER_ENAMODE_PERIODIC <<
98 DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
99
100 writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
101}
102
103static void davinci_tim12_set_oneshot(void __iomem *base)
104{
105 unsigned int tcr;
106
107 tcr = DAVINCI_TIMER_ENAMODE_ONESHOT <<
108 DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
109 /* Same as above. */
110 tcr |= DAVINCI_TIMER_ENAMODE_PERIODIC <<
111 DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
112
113 writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
114}
115
116static int davinci_clockevent_shutdown(struct clock_event_device *dev)
117{
118 struct davinci_clockevent *clockevent;
119
120 clockevent = to_davinci_clockevent(dev);
121
122 davinci_tim12_shutdown(clockevent->base);
123
124 return 0;
125}
126
127static int davinci_clockevent_set_oneshot(struct clock_event_device *dev)
128{
129 struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
130
131 davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_TIM12, 0x0);
132
133 davinci_tim12_set_oneshot(clockevent->base);
134
135 return 0;
136}
137
138static int
139davinci_clockevent_set_next_event_std(unsigned long cycles,
140 struct clock_event_device *dev)
141{
142 struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
143
144 davinci_clockevent_shutdown(dev);
145
146 davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_TIM12, 0x0);
147 davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_PRD12, cycles);
148
149 davinci_clockevent_set_oneshot(dev);
150
151 return 0;
152}
153
154static int
155davinci_clockevent_set_next_event_cmp(unsigned long cycles,
156 struct clock_event_device *dev)
157{
158 struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
159 unsigned int curr_time;
160
161 curr_time = davinci_clockevent_read(clockevent,
162 DAVINCI_TIMER_REG_TIM12);
163 davinci_clockevent_write(clockevent,
164 clockevent->cmp_off, curr_time + cycles);
165
166 return 0;
167}
168
169static irqreturn_t davinci_timer_irq_timer(int irq, void *data)
170{
171 struct davinci_clockevent *clockevent = data;
172
173 if (!clockevent_state_oneshot(&clockevent->dev))
174 davinci_tim12_shutdown(clockevent->base);
175
176 clockevent->dev.event_handler(&clockevent->dev);
177
178 return IRQ_HANDLED;
179}
180
181static u64 notrace davinci_timer_read_sched_clock(void)
182{
183 return readl_relaxed(davinci_clocksource.base +
184 davinci_clocksource.tim_off);
185}
186
187static u64 davinci_clocksource_read(struct clocksource *dev)
188{
189 return davinci_timer_read_sched_clock();
190}
191
192/*
193 * Standard use-case: we're using tim12 for clockevent and tim34 for
194 * clocksource. The default is making the former run in oneshot mode
195 * and the latter in periodic mode.
196 */
197static void davinci_clocksource_init_tim34(void __iomem *base)
198{
199 int tcr;
200
201 tcr = DAVINCI_TIMER_ENAMODE_PERIODIC <<
202 DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
203 tcr |= DAVINCI_TIMER_ENAMODE_ONESHOT <<
204 DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
205
206 writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM34);
207 writel_relaxed(UINT_MAX, base + DAVINCI_TIMER_REG_PRD34);
208 writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
209}
210
211/*
212 * Special use-case on da830: the DSP may use tim34. We're using tim12 for
213 * both clocksource and clockevent. We set tim12 to periodic and don't touch
214 * tim34.
215 */
216static void davinci_clocksource_init_tim12(void __iomem *base)
217{
218 unsigned int tcr;
219
220 tcr = DAVINCI_TIMER_ENAMODE_PERIODIC <<
221 DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
222
223 writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM12);
224 writel_relaxed(UINT_MAX, base + DAVINCI_TIMER_REG_PRD12);
225 writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
226}
227
228static void davinci_timer_init(void __iomem *base)
229{
230 /* Set clock to internal mode and disable it. */
231 writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TCR);
232 /*
233 * Reset both 32-bit timers, set no prescaler for timer 34, set the
234 * timer to dual 32-bit unchained mode, unreset both 32-bit timers.
235 */
236 writel_relaxed(DAVINCI_TIMER_TGCR_DEFAULT,
237 base + DAVINCI_TIMER_REG_TGCR);
238 /* Init both counters to zero. */
239 writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM12);
240 writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM34);
241}
242
243int __init davinci_timer_register(struct clk *clk,
244 const struct davinci_timer_cfg *timer_cfg)
245{
246 struct davinci_clockevent *clockevent;
247 unsigned int tick_rate;
248 void __iomem *base;
249 int rv;
250
251 rv = clk_prepare_enable(clk);
252 if (rv) {
253 pr_err("Unable to prepare and enable the timer clock");
254 return rv;
255 }
256
257 if (!request_mem_region(timer_cfg->reg.start,
258 resource_size(&timer_cfg->reg),
259 "davinci-timer")) {
260 pr_err("Unable to request memory region");
261 return -EBUSY;
262 }
263
264 base = ioremap(timer_cfg->reg.start, resource_size(&timer_cfg->reg));
265 if (!base) {
266 pr_err("Unable to map the register range");
267 return -ENOMEM;
268 }
269
270 davinci_timer_init(base);
271 tick_rate = clk_get_rate(clk);
272
273 clockevent = kzalloc(sizeof(*clockevent), GFP_KERNEL | __GFP_NOFAIL);
274 if (!clockevent) {
275 pr_err("Error allocating memory for clockevent data");
276 return -ENOMEM;
277 }
278
279 clockevent->dev.name = "tim12";
280 clockevent->dev.features = CLOCK_EVT_FEAT_ONESHOT;
281 clockevent->dev.cpumask = cpumask_of(0);
282 clockevent->base = base;
283
284 if (timer_cfg->cmp_off) {
285 clockevent->cmp_off = timer_cfg->cmp_off;
286 clockevent->dev.set_next_event =
287 davinci_clockevent_set_next_event_cmp;
288 } else {
289 clockevent->dev.set_next_event =
290 davinci_clockevent_set_next_event_std;
291 clockevent->dev.set_state_oneshot =
292 davinci_clockevent_set_oneshot;
293 clockevent->dev.set_state_shutdown =
294 davinci_clockevent_shutdown;
295 }
296
297 rv = request_irq(timer_cfg->irq[DAVINCI_TIMER_CLOCKEVENT_IRQ].start,
298 davinci_timer_irq_timer, IRQF_TIMER,
299 "clockevent/tim12", clockevent);
300 if (rv) {
301 pr_err("Unable to request the clockevent interrupt");
302 return rv;
303 }
304
305 clockevents_config_and_register(&clockevent->dev, tick_rate,
306 DAVINCI_TIMER_MIN_DELTA,
307 DAVINCI_TIMER_MAX_DELTA);
308
309 davinci_clocksource.dev.rating = 300;
310 davinci_clocksource.dev.read = davinci_clocksource_read;
311 davinci_clocksource.dev.mask =
312 CLOCKSOURCE_MASK(DAVINCI_TIMER_CLKSRC_BITS);
313 davinci_clocksource.dev.flags = CLOCK_SOURCE_IS_CONTINUOUS;
314 davinci_clocksource.base = base;
315
316 if (timer_cfg->cmp_off) {
317 davinci_clocksource.dev.name = "tim12";
318 davinci_clocksource.tim_off = DAVINCI_TIMER_REG_TIM12;
319 davinci_clocksource_init_tim12(base);
320 } else {
321 davinci_clocksource.dev.name = "tim34";
322 davinci_clocksource.tim_off = DAVINCI_TIMER_REG_TIM34;
323 davinci_clocksource_init_tim34(base);
324 }
325
326 rv = clocksource_register_hz(&davinci_clocksource.dev, tick_rate);
327 if (rv) {
328 pr_err("Unable to register clocksource");
329 return rv;
330 }
331
332 sched_clock_register(davinci_timer_read_sched_clock,
333 DAVINCI_TIMER_CLKSRC_BITS, tick_rate);
334
335 return 0;
336}
337
338static int __init of_davinci_timer_register(struct device_node *np)
339{
340 struct davinci_timer_cfg timer_cfg = { };
341 struct clk *clk;
342 int rv;
343
344 rv = of_address_to_resource(np, 0, &timer_cfg.reg);
345 if (rv) {
346 pr_err("Unable to get the register range for timer");
347 return rv;
348 }
349
350 rv = of_irq_to_resource_table(np, timer_cfg.irq,
351 DAVINCI_TIMER_NUM_IRQS);
352 if (rv != DAVINCI_TIMER_NUM_IRQS) {
353 pr_err("Unable to get the interrupts for timer");
354 return rv;
355 }
356
357 clk = of_clk_get(np, 0);
358 if (IS_ERR(clk)) {
359 pr_err("Unable to get the timer clock");
360 return PTR_ERR(clk);
361 }
362
363 rv = davinci_timer_register(clk, &timer_cfg);
364 if (rv)
365 clk_put(clk);
366
367 return rv;
368}
369TIMER_OF_DECLARE(davinci_timer, "ti,da830-timer", of_davinci_timer_register);
diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c
new file mode 100644
index 000000000000..fd7d68066efb
--- /dev/null
+++ b/drivers/clocksource/timer-imx-sysctr.c
@@ -0,0 +1,145 @@
1// SPDX-License-Identifier: GPL-2.0+
2//
3// Copyright 2017-2019 NXP
4
5#include <linux/interrupt.h>
6#include <linux/clockchips.h>
7#include <linux/of_address.h>
8#include <linux/of_irq.h>
9
10#include "timer-of.h"
11
12#define CMP_OFFSET 0x10000
13
14#define CNTCV_LO 0x8
15#define CNTCV_HI 0xc
16#define CMPCV_LO (CMP_OFFSET + 0x20)
17#define CMPCV_HI (CMP_OFFSET + 0x24)
18#define CMPCR (CMP_OFFSET + 0x2c)
19
20#define SYS_CTR_EN 0x1
21#define SYS_CTR_IRQ_MASK 0x2
22
23static void __iomem *sys_ctr_base;
24static u32 cmpcr;
25
26static void sysctr_timer_enable(bool enable)
27{
28 writel(enable ? cmpcr | SYS_CTR_EN : cmpcr, sys_ctr_base + CMPCR);
29}
30
31static void sysctr_irq_acknowledge(void)
32{
33 /*
34 * clear the enable bit(EN =0) will clear
35 * the status bit(ISTAT = 0), then the interrupt
36 * signal will be negated(acknowledged).
37 */
38 sysctr_timer_enable(false);
39}
40
41static inline u64 sysctr_read_counter(void)
42{
43 u32 cnt_hi, tmp_hi, cnt_lo;
44
45 do {
46 cnt_hi = readl_relaxed(sys_ctr_base + CNTCV_HI);
47 cnt_lo = readl_relaxed(sys_ctr_base + CNTCV_LO);
48 tmp_hi = readl_relaxed(sys_ctr_base + CNTCV_HI);
49 } while (tmp_hi != cnt_hi);
50
51 return ((u64) cnt_hi << 32) | cnt_lo;
52}
53
54static int sysctr_set_next_event(unsigned long delta,
55 struct clock_event_device *evt)
56{
57 u32 cmp_hi, cmp_lo;
58 u64 next;
59
60 sysctr_timer_enable(false);
61
62 next = sysctr_read_counter();
63
64 next += delta;
65
66 cmp_hi = (next >> 32) & 0x00fffff;
67 cmp_lo = next & 0xffffffff;
68
69 writel_relaxed(cmp_hi, sys_ctr_base + CMPCV_HI);
70 writel_relaxed(cmp_lo, sys_ctr_base + CMPCV_LO);
71
72 sysctr_timer_enable(true);
73
74 return 0;
75}
76
77static int sysctr_set_state_oneshot(struct clock_event_device *evt)
78{
79 return 0;
80}
81
82static int sysctr_set_state_shutdown(struct clock_event_device *evt)
83{
84 sysctr_timer_enable(false);
85
86 return 0;
87}
88
89static irqreturn_t sysctr_timer_interrupt(int irq, void *dev_id)
90{
91 struct clock_event_device *evt = dev_id;
92
93 sysctr_irq_acknowledge();
94
95 evt->event_handler(evt);
96
97 return IRQ_HANDLED;
98}
99
100static struct timer_of to_sysctr = {
101 .flags = TIMER_OF_IRQ | TIMER_OF_CLOCK | TIMER_OF_BASE,
102 .clkevt = {
103 .name = "i.MX system counter timer",
104 .features = CLOCK_EVT_FEAT_ONESHOT |
105 CLOCK_EVT_FEAT_DYNIRQ,
106 .set_state_oneshot = sysctr_set_state_oneshot,
107 .set_next_event = sysctr_set_next_event,
108 .set_state_shutdown = sysctr_set_state_shutdown,
109 .rating = 200,
110 },
111 .of_irq = {
112 .handler = sysctr_timer_interrupt,
113 .flags = IRQF_TIMER | IRQF_IRQPOLL,
114 },
115 .of_clk = {
116 .name = "per",
117 },
118};
119
120static void __init sysctr_clockevent_init(void)
121{
122 to_sysctr.clkevt.cpumask = cpumask_of(0);
123
124 clockevents_config_and_register(&to_sysctr.clkevt,
125 timer_of_rate(&to_sysctr),
126 0xff, 0x7fffffff);
127}
128
129static int __init sysctr_timer_init(struct device_node *np)
130{
131 int ret = 0;
132
133 ret = timer_of_init(np, &to_sysctr);
134 if (ret)
135 return ret;
136
137 sys_ctr_base = timer_of_base(&to_sysctr);
138 cmpcr = readl(sys_ctr_base + CMPCR);
139 cmpcr &= ~SYS_CTR_EN;
140
141 sysctr_clockevent_init();
142
143 return 0;
144}
145TIMER_OF_DECLARE(sysctr_timer, "nxp,sysctr-timer", sysctr_timer_init);
diff --git a/drivers/clocksource/timer-ixp4xx.c b/drivers/clocksource/timer-ixp4xx.c
index 5c2190b654cd..9396745e1c17 100644
--- a/drivers/clocksource/timer-ixp4xx.c
+++ b/drivers/clocksource/timer-ixp4xx.c
@@ -75,14 +75,19 @@ to_ixp4xx_timer(struct clock_event_device *evt)
75 return container_of(evt, struct ixp4xx_timer, clkevt); 75 return container_of(evt, struct ixp4xx_timer, clkevt);
76} 76}
77 77
78static u64 notrace ixp4xx_read_sched_clock(void) 78static unsigned long ixp4xx_read_timer(void)
79{ 79{
80 return __raw_readl(local_ixp4xx_timer->base + IXP4XX_OSTS_OFFSET); 80 return __raw_readl(local_ixp4xx_timer->base + IXP4XX_OSTS_OFFSET);
81} 81}
82 82
83static u64 notrace ixp4xx_read_sched_clock(void)
84{
85 return ixp4xx_read_timer();
86}
87
83static u64 ixp4xx_clocksource_read(struct clocksource *c) 88static u64 ixp4xx_clocksource_read(struct clocksource *c)
84{ 89{
85 return __raw_readl(local_ixp4xx_timer->base + IXP4XX_OSTS_OFFSET); 90 return ixp4xx_read_timer();
86} 91}
87 92
88static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id) 93static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id)
@@ -224,6 +229,13 @@ static __init int ixp4xx_timer_register(void __iomem *base,
224 229
225 sched_clock_register(ixp4xx_read_sched_clock, 32, timer_freq); 230 sched_clock_register(ixp4xx_read_sched_clock, 32, timer_freq);
226 231
232#ifdef CONFIG_ARM
233 /* Also use this timer for delays */
234 tmr->delay_timer.read_current_timer = ixp4xx_read_timer;
235 tmr->delay_timer.freq = timer_freq;
236 register_current_timer_delay(&tmr->delay_timer);
237#endif
238
227 return 0; 239 return 0;
228} 240}
229 241
diff --git a/drivers/clocksource/timer-meson6.c b/drivers/clocksource/timer-meson6.c
index 84bd9479c3f8..9e8b467c71da 100644
--- a/drivers/clocksource/timer-meson6.c
+++ b/drivers/clocksource/timer-meson6.c
@@ -1,13 +1,10 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Amlogic Meson6 SoCs timer handling. 3 * Amlogic Meson6 SoCs timer handling.
3 * 4 *
4 * Copyright (C) 2014 Carlo Caione <carlo@caione.org> 5 * Copyright (C) 2014 Carlo Caione <carlo@caione.org>
5 * 6 *
6 * Based on code from Amlogic, Inc 7 * Based on code from Amlogic, Inc
7 *
8 * This file is licensed under the terms of the GNU General Public
9 * License version 2. This program is licensed "as is" without any
10 * warranty of any kind, whether express or implied.
11 */ 8 */
12 9
13#include <linux/bitfield.h> 10#include <linux/bitfield.h>
diff --git a/drivers/clocksource/timer-tegra.c b/drivers/clocksource/timer-tegra.c
new file mode 100644
index 000000000000..e9635c25eef4
--- /dev/null
+++ b/drivers/clocksource/timer-tegra.c
@@ -0,0 +1,416 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2010 Google, Inc.
4 *
5 * Author:
6 * Colin Cross <ccross@google.com>
7 */
8
9#define pr_fmt(fmt) "tegra-timer: " fmt
10
11#include <linux/clk.h>
12#include <linux/clockchips.h>
13#include <linux/cpu.h>
14#include <linux/cpumask.h>
15#include <linux/delay.h>
16#include <linux/err.h>
17#include <linux/interrupt.h>
18#include <linux/of_address.h>
19#include <linux/of_irq.h>
20#include <linux/percpu.h>
21#include <linux/sched_clock.h>
22#include <linux/time.h>
23
24#include "timer-of.h"
25
26#define RTC_SECONDS 0x08
27#define RTC_SHADOW_SECONDS 0x0c
28#define RTC_MILLISECONDS 0x10
29
30#define TIMERUS_CNTR_1US 0x10
31#define TIMERUS_USEC_CFG 0x14
32#define TIMERUS_CNTR_FREEZE 0x4c
33
34#define TIMER_PTV 0x0
35#define TIMER_PTV_EN BIT(31)
36#define TIMER_PTV_PER BIT(30)
37#define TIMER_PCR 0x4
38#define TIMER_PCR_INTR_CLR BIT(30)
39
40#define TIMER1_BASE 0x00
41#define TIMER2_BASE 0x08
42#define TIMER3_BASE 0x50
43#define TIMER4_BASE 0x58
44#define TIMER10_BASE 0x90
45
46#define TIMER1_IRQ_IDX 0
47#define TIMER10_IRQ_IDX 10
48
49#define TIMER_1MHz 1000000
50
51static u32 usec_config;
52static void __iomem *timer_reg_base;
53
54static int tegra_timer_set_next_event(unsigned long cycles,
55 struct clock_event_device *evt)
56{
57 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
58
59 /*
60 * Tegra's timer uses n+1 scheme for the counter, i.e. timer will
61 * fire after one tick if 0 is loaded.
62 *
63 * The minimum and maximum numbers of oneshot ticks are defined
64 * by clockevents_config_and_register(1, 0x1fffffff + 1) invocation
65 * below in the code. Hence the cycles (ticks) can't be outside of
66 * a range supportable by hardware.
67 */
68 writel_relaxed(TIMER_PTV_EN | (cycles - 1), reg_base + TIMER_PTV);
69
70 return 0;
71}
72
73static int tegra_timer_shutdown(struct clock_event_device *evt)
74{
75 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
76
77 writel_relaxed(0, reg_base + TIMER_PTV);
78
79 return 0;
80}
81
82static int tegra_timer_set_periodic(struct clock_event_device *evt)
83{
84 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
85 unsigned long period = timer_of_period(to_timer_of(evt));
86
87 writel_relaxed(TIMER_PTV_EN | TIMER_PTV_PER | (period - 1),
88 reg_base + TIMER_PTV);
89
90 return 0;
91}
92
93static irqreturn_t tegra_timer_isr(int irq, void *dev_id)
94{
95 struct clock_event_device *evt = dev_id;
96 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
97
98 writel_relaxed(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
99 evt->event_handler(evt);
100
101 return IRQ_HANDLED;
102}
103
104static void tegra_timer_suspend(struct clock_event_device *evt)
105{
106 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
107
108 writel_relaxed(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
109}
110
111static void tegra_timer_resume(struct clock_event_device *evt)
112{
113 writel_relaxed(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
114}
115
116static DEFINE_PER_CPU(struct timer_of, tegra_to) = {
117 .flags = TIMER_OF_CLOCK | TIMER_OF_BASE,
118
119 .clkevt = {
120 .name = "tegra_timer",
121 .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
122 .set_next_event = tegra_timer_set_next_event,
123 .set_state_shutdown = tegra_timer_shutdown,
124 .set_state_periodic = tegra_timer_set_periodic,
125 .set_state_oneshot = tegra_timer_shutdown,
126 .tick_resume = tegra_timer_shutdown,
127 .suspend = tegra_timer_suspend,
128 .resume = tegra_timer_resume,
129 },
130};
131
132static int tegra_timer_setup(unsigned int cpu)
133{
134 struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
135
136 writel_relaxed(0, timer_of_base(to) + TIMER_PTV);
137 writel_relaxed(TIMER_PCR_INTR_CLR, timer_of_base(to) + TIMER_PCR);
138
139 irq_force_affinity(to->clkevt.irq, cpumask_of(cpu));
140 enable_irq(to->clkevt.irq);
141
142 /*
143 * Tegra's timer uses n+1 scheme for the counter, i.e. timer will
144 * fire after one tick if 0 is loaded and thus minimum number of
145 * ticks is 1. In result both of the clocksource's tick limits are
146 * higher than a minimum and maximum that hardware register can
147 * take by 1, this is then taken into account by set_next_event
148 * callback.
149 */
150 clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
151 1, /* min */
152 0x1fffffff + 1); /* max 29 bits + 1 */
153
154 return 0;
155}
156
157static int tegra_timer_stop(unsigned int cpu)
158{
159 struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
160
161 to->clkevt.set_state_shutdown(&to->clkevt);
162 disable_irq_nosync(to->clkevt.irq);
163
164 return 0;
165}
166
167static u64 notrace tegra_read_sched_clock(void)
168{
169 return readl_relaxed(timer_reg_base + TIMERUS_CNTR_1US);
170}
171
172#ifdef CONFIG_ARM
173static unsigned long tegra_delay_timer_read_counter_long(void)
174{
175 return readl_relaxed(timer_reg_base + TIMERUS_CNTR_1US);
176}
177
178static struct delay_timer tegra_delay_timer = {
179 .read_current_timer = tegra_delay_timer_read_counter_long,
180 .freq = TIMER_1MHz,
181};
182#endif
183
184static struct timer_of suspend_rtc_to = {
185 .flags = TIMER_OF_BASE | TIMER_OF_CLOCK,
186};
187
188/*
189 * tegra_rtc_read - Reads the Tegra RTC registers
190 * Care must be taken that this function is not called while the
191 * tegra_rtc driver could be executing to avoid race conditions
192 * on the RTC shadow register
193 */
194static u64 tegra_rtc_read_ms(struct clocksource *cs)
195{
196 void __iomem *reg_base = timer_of_base(&suspend_rtc_to);
197
198 u32 ms = readl_relaxed(reg_base + RTC_MILLISECONDS);
199 u32 s = readl_relaxed(reg_base + RTC_SHADOW_SECONDS);
200
201 return (u64)s * MSEC_PER_SEC + ms;
202}
203
204static struct clocksource suspend_rtc_clocksource = {
205 .name = "tegra_suspend_timer",
206 .rating = 200,
207 .read = tegra_rtc_read_ms,
208 .mask = CLOCKSOURCE_MASK(32),
209 .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_SUSPEND_NONSTOP,
210};
211
212static inline unsigned int tegra_base_for_cpu(int cpu, bool tegra20)
213{
214 if (tegra20) {
215 switch (cpu) {
216 case 0:
217 return TIMER1_BASE;
218 case 1:
219 return TIMER2_BASE;
220 case 2:
221 return TIMER3_BASE;
222 default:
223 return TIMER4_BASE;
224 }
225 }
226
227 return TIMER10_BASE + cpu * 8;
228}
229
230static inline unsigned int tegra_irq_idx_for_cpu(int cpu, bool tegra20)
231{
232 if (tegra20)
233 return TIMER1_IRQ_IDX + cpu;
234
235 return TIMER10_IRQ_IDX + cpu;
236}
237
238static inline unsigned long tegra_rate_for_timer(struct timer_of *to,
239 bool tegra20)
240{
241 /*
242 * TIMER1-9 are fixed to 1MHz, TIMER10-13 are running off the
243 * parent clock.
244 */
245 if (tegra20)
246 return TIMER_1MHz;
247
248 return timer_of_rate(to);
249}
250
251static int __init tegra_init_timer(struct device_node *np, bool tegra20,
252 int rating)
253{
254 struct timer_of *to;
255 int cpu, ret;
256
257 to = this_cpu_ptr(&tegra_to);
258 ret = timer_of_init(np, to);
259 if (ret)
260 goto out;
261
262 timer_reg_base = timer_of_base(to);
263
264 /*
265 * Configure microsecond timers to have 1MHz clock
266 * Config register is 0xqqww, where qq is "dividend", ww is "divisor"
267 * Uses n+1 scheme
268 */
269 switch (timer_of_rate(to)) {
270 case 12000000:
271 usec_config = 0x000b; /* (11+1)/(0+1) */
272 break;
273 case 12800000:
274 usec_config = 0x043f; /* (63+1)/(4+1) */
275 break;
276 case 13000000:
277 usec_config = 0x000c; /* (12+1)/(0+1) */
278 break;
279 case 16800000:
280 usec_config = 0x0453; /* (83+1)/(4+1) */
281 break;
282 case 19200000:
283 usec_config = 0x045f; /* (95+1)/(4+1) */
284 break;
285 case 26000000:
286 usec_config = 0x0019; /* (25+1)/(0+1) */
287 break;
288 case 38400000:
289 usec_config = 0x04bf; /* (191+1)/(4+1) */
290 break;
291 case 48000000:
292 usec_config = 0x002f; /* (47+1)/(0+1) */
293 break;
294 default:
295 ret = -EINVAL;
296 goto out;
297 }
298
299 writel_relaxed(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
300
301 for_each_possible_cpu(cpu) {
302 struct timer_of *cpu_to = per_cpu_ptr(&tegra_to, cpu);
303 unsigned long flags = IRQF_TIMER | IRQF_NOBALANCING;
304 unsigned long rate = tegra_rate_for_timer(to, tegra20);
305 unsigned int base = tegra_base_for_cpu(cpu, tegra20);
306 unsigned int idx = tegra_irq_idx_for_cpu(cpu, tegra20);
307 unsigned int irq = irq_of_parse_and_map(np, idx);
308
309 if (!irq) {
310 pr_err("failed to map irq for cpu%d\n", cpu);
311 ret = -EINVAL;
312 goto out_irq;
313 }
314
315 cpu_to->clkevt.irq = irq;
316 cpu_to->clkevt.rating = rating;
317 cpu_to->clkevt.cpumask = cpumask_of(cpu);
318 cpu_to->of_base.base = timer_reg_base + base;
319 cpu_to->of_clk.period = rate / HZ;
320 cpu_to->of_clk.rate = rate;
321
322 irq_set_status_flags(cpu_to->clkevt.irq, IRQ_NOAUTOEN);
323
324 ret = request_irq(cpu_to->clkevt.irq, tegra_timer_isr, flags,
325 cpu_to->clkevt.name, &cpu_to->clkevt);
326 if (ret) {
327 pr_err("failed to set up irq for cpu%d: %d\n",
328 cpu, ret);
329 irq_dispose_mapping(cpu_to->clkevt.irq);
330 cpu_to->clkevt.irq = 0;
331 goto out_irq;
332 }
333 }
334
335 sched_clock_register(tegra_read_sched_clock, 32, TIMER_1MHz);
336
337 ret = clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
338 "timer_us", TIMER_1MHz, 300, 32,
339 clocksource_mmio_readl_up);
340 if (ret)
341 pr_err("failed to register clocksource: %d\n", ret);
342
343#ifdef CONFIG_ARM
344 register_current_timer_delay(&tegra_delay_timer);
345#endif
346
347 ret = cpuhp_setup_state(CPUHP_AP_TEGRA_TIMER_STARTING,
348 "AP_TEGRA_TIMER_STARTING", tegra_timer_setup,
349 tegra_timer_stop);
350 if (ret)
351 pr_err("failed to set up cpu hp state: %d\n", ret);
352
353 return ret;
354
355out_irq:
356 for_each_possible_cpu(cpu) {
357 struct timer_of *cpu_to;
358
359 cpu_to = per_cpu_ptr(&tegra_to, cpu);
360 if (cpu_to->clkevt.irq) {
361 free_irq(cpu_to->clkevt.irq, &cpu_to->clkevt);
362 irq_dispose_mapping(cpu_to->clkevt.irq);
363 }
364 }
365
366 to->of_base.base = timer_reg_base;
367out:
368 timer_of_cleanup(to);
369
370 return ret;
371}
372
373static int __init tegra210_init_timer(struct device_node *np)
374{
375 /*
376 * Arch-timer can't survive across power cycle of CPU core and
377 * after CPUPORESET signal due to a system design shortcoming,
378 * hence tegra-timer is more preferable on Tegra210.
379 */
380 return tegra_init_timer(np, false, 460);
381}
382TIMER_OF_DECLARE(tegra210_timer, "nvidia,tegra210-timer", tegra210_init_timer);
383
384static int __init tegra20_init_timer(struct device_node *np)
385{
386 int rating;
387
388 /*
389 * Tegra20 and Tegra30 have Cortex A9 CPU that has a TWD timer,
390 * that timer runs off the CPU clock and hence is subjected to
391 * a jitter caused by DVFS clock rate changes. Tegra-timer is
392 * more preferable for older Tegra's, while later SoC generations
393 * have arch-timer as a main per-CPU timer and it is not affected
394 * by DVFS changes.
395 */
396 if (of_machine_is_compatible("nvidia,tegra20") ||
397 of_machine_is_compatible("nvidia,tegra30"))
398 rating = 460;
399 else
400 rating = 330;
401
402 return tegra_init_timer(np, true, rating);
403}
404TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
405
406static int __init tegra20_init_rtc(struct device_node *np)
407{
408 int ret;
409
410 ret = timer_of_init(np, &suspend_rtc_to);
411 if (ret)
412 return ret;
413
414 return clocksource_register_hz(&suspend_rtc_clocksource, 1000);
415}
416TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
diff --git a/drivers/clocksource/timer-tegra20.c b/drivers/clocksource/timer-tegra20.c
deleted file mode 100644
index 1e7ece279730..000000000000
--- a/drivers/clocksource/timer-tegra20.c
+++ /dev/null
@@ -1,379 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2010 Google, Inc.
4 *
5 * Author:
6 * Colin Cross <ccross@google.com>
7 */
8
9#include <linux/clk.h>
10#include <linux/clockchips.h>
11#include <linux/cpu.h>
12#include <linux/cpumask.h>
13#include <linux/delay.h>
14#include <linux/err.h>
15#include <linux/interrupt.h>
16#include <linux/of_address.h>
17#include <linux/of_irq.h>
18#include <linux/percpu.h>
19#include <linux/sched_clock.h>
20#include <linux/time.h>
21
22#include "timer-of.h"
23
24#ifdef CONFIG_ARM
25#include <asm/mach/time.h>
26#endif
27
28#define RTC_SECONDS 0x08
29#define RTC_SHADOW_SECONDS 0x0c
30#define RTC_MILLISECONDS 0x10
31
32#define TIMERUS_CNTR_1US 0x10
33#define TIMERUS_USEC_CFG 0x14
34#define TIMERUS_CNTR_FREEZE 0x4c
35
36#define TIMER_PTV 0x0
37#define TIMER_PTV_EN BIT(31)
38#define TIMER_PTV_PER BIT(30)
39#define TIMER_PCR 0x4
40#define TIMER_PCR_INTR_CLR BIT(30)
41
42#ifdef CONFIG_ARM
43#define TIMER_CPU0 0x50 /* TIMER3 */
44#else
45#define TIMER_CPU0 0x90 /* TIMER10 */
46#define TIMER10_IRQ_IDX 10
47#define IRQ_IDX_FOR_CPU(cpu) (TIMER10_IRQ_IDX + cpu)
48#endif
49#define TIMER_BASE_FOR_CPU(cpu) (TIMER_CPU0 + (cpu) * 8)
50
51static u32 usec_config;
52static void __iomem *timer_reg_base;
53#ifdef CONFIG_ARM
54static struct delay_timer tegra_delay_timer;
55#endif
56
57static int tegra_timer_set_next_event(unsigned long cycles,
58 struct clock_event_device *evt)
59{
60 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
61
62 writel(TIMER_PTV_EN |
63 ((cycles > 1) ? (cycles - 1) : 0), /* n+1 scheme */
64 reg_base + TIMER_PTV);
65
66 return 0;
67}
68
69static int tegra_timer_shutdown(struct clock_event_device *evt)
70{
71 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
72
73 writel(0, reg_base + TIMER_PTV);
74
75 return 0;
76}
77
78static int tegra_timer_set_periodic(struct clock_event_device *evt)
79{
80 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
81
82 writel(TIMER_PTV_EN | TIMER_PTV_PER |
83 ((timer_of_rate(to_timer_of(evt)) / HZ) - 1),
84 reg_base + TIMER_PTV);
85
86 return 0;
87}
88
89static irqreturn_t tegra_timer_isr(int irq, void *dev_id)
90{
91 struct clock_event_device *evt = (struct clock_event_device *)dev_id;
92 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
93
94 writel(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
95 evt->event_handler(evt);
96
97 return IRQ_HANDLED;
98}
99
100static void tegra_timer_suspend(struct clock_event_device *evt)
101{
102 void __iomem *reg_base = timer_of_base(to_timer_of(evt));
103
104 writel(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
105}
106
107static void tegra_timer_resume(struct clock_event_device *evt)
108{
109 writel(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
110}
111
112#ifdef CONFIG_ARM64
113static DEFINE_PER_CPU(struct timer_of, tegra_to) = {
114 .flags = TIMER_OF_CLOCK | TIMER_OF_BASE,
115
116 .clkevt = {
117 .name = "tegra_timer",
118 .rating = 460,
119 .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
120 .set_next_event = tegra_timer_set_next_event,
121 .set_state_shutdown = tegra_timer_shutdown,
122 .set_state_periodic = tegra_timer_set_periodic,
123 .set_state_oneshot = tegra_timer_shutdown,
124 .tick_resume = tegra_timer_shutdown,
125 .suspend = tegra_timer_suspend,
126 .resume = tegra_timer_resume,
127 },
128};
129
130static int tegra_timer_setup(unsigned int cpu)
131{
132 struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
133
134 irq_force_affinity(to->clkevt.irq, cpumask_of(cpu));
135 enable_irq(to->clkevt.irq);
136
137 clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
138 1, /* min */
139 0x1fffffff); /* 29 bits */
140
141 return 0;
142}
143
144static int tegra_timer_stop(unsigned int cpu)
145{
146 struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
147
148 to->clkevt.set_state_shutdown(&to->clkevt);
149 disable_irq_nosync(to->clkevt.irq);
150
151 return 0;
152}
153#else /* CONFIG_ARM */
154static struct timer_of tegra_to = {
155 .flags = TIMER_OF_CLOCK | TIMER_OF_BASE | TIMER_OF_IRQ,
156
157 .clkevt = {
158 .name = "tegra_timer",
159 .rating = 300,
160 .features = CLOCK_EVT_FEAT_ONESHOT |
161 CLOCK_EVT_FEAT_PERIODIC |
162 CLOCK_EVT_FEAT_DYNIRQ,
163 .set_next_event = tegra_timer_set_next_event,
164 .set_state_shutdown = tegra_timer_shutdown,
165 .set_state_periodic = tegra_timer_set_periodic,
166 .set_state_oneshot = tegra_timer_shutdown,
167 .tick_resume = tegra_timer_shutdown,
168 .suspend = tegra_timer_suspend,
169 .resume = tegra_timer_resume,
170 .cpumask = cpu_possible_mask,
171 },
172
173 .of_irq = {
174 .index = 2,
175 .flags = IRQF_TIMER | IRQF_TRIGGER_HIGH,
176 .handler = tegra_timer_isr,
177 },
178};
179
180static u64 notrace tegra_read_sched_clock(void)
181{
182 return readl(timer_reg_base + TIMERUS_CNTR_1US);
183}
184
185static unsigned long tegra_delay_timer_read_counter_long(void)
186{
187 return readl(timer_reg_base + TIMERUS_CNTR_1US);
188}
189
190static struct timer_of suspend_rtc_to = {
191 .flags = TIMER_OF_BASE | TIMER_OF_CLOCK,
192};
193
194/*
195 * tegra_rtc_read - Reads the Tegra RTC registers
196 * Care must be taken that this funciton is not called while the
197 * tegra_rtc driver could be executing to avoid race conditions
198 * on the RTC shadow register
199 */
200static u64 tegra_rtc_read_ms(struct clocksource *cs)
201{
202 u32 ms = readl(timer_of_base(&suspend_rtc_to) + RTC_MILLISECONDS);
203 u32 s = readl(timer_of_base(&suspend_rtc_to) + RTC_SHADOW_SECONDS);
204 return (u64)s * MSEC_PER_SEC + ms;
205}
206
207static struct clocksource suspend_rtc_clocksource = {
208 .name = "tegra_suspend_timer",
209 .rating = 200,
210 .read = tegra_rtc_read_ms,
211 .mask = CLOCKSOURCE_MASK(32),
212 .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_SUSPEND_NONSTOP,
213};
214#endif
215
216static int tegra_timer_common_init(struct device_node *np, struct timer_of *to)
217{
218 int ret = 0;
219
220 ret = timer_of_init(np, to);
221 if (ret < 0)
222 goto out;
223
224 timer_reg_base = timer_of_base(to);
225
226 /*
227 * Configure microsecond timers to have 1MHz clock
228 * Config register is 0xqqww, where qq is "dividend", ww is "divisor"
229 * Uses n+1 scheme
230 */
231 switch (timer_of_rate(to)) {
232 case 12000000:
233 usec_config = 0x000b; /* (11+1)/(0+1) */
234 break;
235 case 12800000:
236 usec_config = 0x043f; /* (63+1)/(4+1) */
237 break;
238 case 13000000:
239 usec_config = 0x000c; /* (12+1)/(0+1) */
240 break;
241 case 16800000:
242 usec_config = 0x0453; /* (83+1)/(4+1) */
243 break;
244 case 19200000:
245 usec_config = 0x045f; /* (95+1)/(4+1) */
246 break;
247 case 26000000:
248 usec_config = 0x0019; /* (25+1)/(0+1) */
249 break;
250 case 38400000:
251 usec_config = 0x04bf; /* (191+1)/(4+1) */
252 break;
253 case 48000000:
254 usec_config = 0x002f; /* (47+1)/(0+1) */
255 break;
256 default:
257 ret = -EINVAL;
258 goto out;
259 }
260
261 writel(usec_config, timer_of_base(to) + TIMERUS_USEC_CFG);
262
263out:
264 return ret;
265}
266
267#ifdef CONFIG_ARM64
268static int __init tegra_init_timer(struct device_node *np)
269{
270 int cpu, ret = 0;
271 struct timer_of *to;
272
273 to = this_cpu_ptr(&tegra_to);
274 ret = tegra_timer_common_init(np, to);
275 if (ret < 0)
276 goto out;
277
278 for_each_possible_cpu(cpu) {
279 struct timer_of *cpu_to;
280
281 cpu_to = per_cpu_ptr(&tegra_to, cpu);
282 cpu_to->of_base.base = timer_reg_base + TIMER_BASE_FOR_CPU(cpu);
283 cpu_to->of_clk.rate = timer_of_rate(to);
284 cpu_to->clkevt.cpumask = cpumask_of(cpu);
285 cpu_to->clkevt.irq =
286 irq_of_parse_and_map(np, IRQ_IDX_FOR_CPU(cpu));
287 if (!cpu_to->clkevt.irq) {
288 pr_err("%s: can't map IRQ for CPU%d\n",
289 __func__, cpu);
290 ret = -EINVAL;
291 goto out;
292 }
293
294 irq_set_status_flags(cpu_to->clkevt.irq, IRQ_NOAUTOEN);
295 ret = request_irq(cpu_to->clkevt.irq, tegra_timer_isr,
296 IRQF_TIMER | IRQF_NOBALANCING,
297 cpu_to->clkevt.name, &cpu_to->clkevt);
298 if (ret) {
299 pr_err("%s: cannot setup irq %d for CPU%d\n",
300 __func__, cpu_to->clkevt.irq, cpu);
301 ret = -EINVAL;
302 goto out_irq;
303 }
304 }
305
306 cpuhp_setup_state(CPUHP_AP_TEGRA_TIMER_STARTING,
307 "AP_TEGRA_TIMER_STARTING", tegra_timer_setup,
308 tegra_timer_stop);
309
310 return ret;
311out_irq:
312 for_each_possible_cpu(cpu) {
313 struct timer_of *cpu_to;
314
315 cpu_to = per_cpu_ptr(&tegra_to, cpu);
316 if (cpu_to->clkevt.irq) {
317 free_irq(cpu_to->clkevt.irq, &cpu_to->clkevt);
318 irq_dispose_mapping(cpu_to->clkevt.irq);
319 }
320 }
321out:
322 timer_of_cleanup(to);
323 return ret;
324}
325#else /* CONFIG_ARM */
326static int __init tegra_init_timer(struct device_node *np)
327{
328 int ret = 0;
329
330 ret = tegra_timer_common_init(np, &tegra_to);
331 if (ret < 0)
332 goto out;
333
334 tegra_to.of_base.base = timer_reg_base + TIMER_BASE_FOR_CPU(0);
335 tegra_to.of_clk.rate = 1000000; /* microsecond timer */
336
337 sched_clock_register(tegra_read_sched_clock, 32,
338 timer_of_rate(&tegra_to));
339 ret = clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
340 "timer_us", timer_of_rate(&tegra_to),
341 300, 32, clocksource_mmio_readl_up);
342 if (ret) {
343 pr_err("Failed to register clocksource\n");
344 goto out;
345 }
346
347 tegra_delay_timer.read_current_timer =
348 tegra_delay_timer_read_counter_long;
349 tegra_delay_timer.freq = timer_of_rate(&tegra_to);
350 register_current_timer_delay(&tegra_delay_timer);
351
352 clockevents_config_and_register(&tegra_to.clkevt,
353 timer_of_rate(&tegra_to),
354 0x1,
355 0x1fffffff);
356
357 return ret;
358out:
359 timer_of_cleanup(&tegra_to);
360
361 return ret;
362}
363
364static int __init tegra20_init_rtc(struct device_node *np)
365{
366 int ret;
367
368 ret = timer_of_init(np, &suspend_rtc_to);
369 if (ret)
370 return ret;
371
372 clocksource_register_hz(&suspend_rtc_clocksource, 1000);
373
374 return 0;
375}
376TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
377#endif
378TIMER_OF_DECLARE(tegra210_timer, "nvidia,tegra210-timer", tegra_init_timer);
379TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra_init_timer);
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index 5c4aa606208c..2de5e3672e42 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -856,7 +856,7 @@ static ssize_t nx842_##_name##_show(struct device *dev, \
856 rcu_read_lock(); \ 856 rcu_read_lock(); \
857 local_devdata = rcu_dereference(devdata); \ 857 local_devdata = rcu_dereference(devdata); \
858 if (local_devdata) \ 858 if (local_devdata) \
859 p = snprintf(buf, PAGE_SIZE, "%ld\n", \ 859 p = snprintf(buf, PAGE_SIZE, "%lld\n", \
860 atomic64_read(&local_devdata->counters->_name)); \ 860 atomic64_read(&local_devdata->counters->_name)); \
861 rcu_read_unlock(); \ 861 rcu_read_unlock(); \
862 return p; \ 862 return p; \
@@ -909,7 +909,7 @@ static ssize_t nx842_timehist_show(struct device *dev,
909 } 909 }
910 910
911 for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { 911 for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
912 bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", 912 bytes = snprintf(p, bytes_remain, "%u-%uus:\t%lld\n",
913 i ? (2<<(i-1)) : 0, (2<<i)-1, 913 i ? (2<<(i-1)) : 0, (2<<i)-1,
914 atomic64_read(&times[i])); 914 atomic64_read(&times[i]));
915 bytes_remain -= bytes; 915 bytes_remain -= bytes;
@@ -917,7 +917,7 @@ static ssize_t nx842_timehist_show(struct device *dev,
917 } 917 }
918 /* The last bucket holds everything over 918 /* The last bucket holds everything over
919 * 2<<(NX842_HIST_SLOTS - 2) us */ 919 * 2<<(NX842_HIST_SLOTS - 2) us */
920 bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", 920 bytes = snprintf(p, bytes_remain, "%uus - :\t%lld\n",
921 2<<(NX842_HIST_SLOTS - 2), 921 2<<(NX842_HIST_SLOTS - 2),
922 atomic64_read(&times[(NX842_HIST_SLOTS - 1)])); 922 atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
923 p += bytes; 923 p += bytes;
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index 263bee76ef0d..6b8c4c458e8a 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -718,12 +718,13 @@ static irqreturn_t jz4780_dma_irq_handler(int irq, void *data)
718{ 718{
719 struct jz4780_dma_dev *jzdma = data; 719 struct jz4780_dma_dev *jzdma = data;
720 unsigned int nb_channels = jzdma->soc_data->nb_channels; 720 unsigned int nb_channels = jzdma->soc_data->nb_channels;
721 uint32_t pending, dmac; 721 unsigned long pending;
722 uint32_t dmac;
722 int i; 723 int i;
723 724
724 pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP); 725 pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP);
725 726
726 for_each_set_bit(i, (unsigned long *)&pending, nb_channels) { 727 for_each_set_bit(i, &pending, nb_channels) {
727 if (jz4780_dma_chan_irq(jzdma, &jzdma->chan[i])) 728 if (jz4780_dma_chan_irq(jzdma, &jzdma->chan[i]))
728 pending &= ~BIT(i); 729 pending &= ~BIT(i);
729 } 730 }
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 99d9f431ae2c..4ec84a633bd3 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -703,7 +703,7 @@ static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
703 spin_lock_irqsave(&sdma->channel_0_lock, flags); 703 spin_lock_irqsave(&sdma->channel_0_lock, flags);
704 704
705 bd0->mode.command = C0_SETPM; 705 bd0->mode.command = C0_SETPM;
706 bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; 706 bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
707 bd0->mode.count = size / 2; 707 bd0->mode.count = size / 2;
708 bd0->buffer_addr = buf_phys; 708 bd0->buffer_addr = buf_phys;
709 bd0->ext_buffer_addr = address; 709 bd0->ext_buffer_addr = address;
@@ -1025,7 +1025,7 @@ static int sdma_load_context(struct sdma_channel *sdmac)
1025 context->gReg[7] = sdmac->watermark_level; 1025 context->gReg[7] = sdmac->watermark_level;
1026 1026
1027 bd0->mode.command = C0_SETDM; 1027 bd0->mode.command = C0_SETDM;
1028 bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; 1028 bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
1029 bd0->mode.count = sizeof(*context) / 4; 1029 bd0->mode.count = sizeof(*context) / 4;
1030 bd0->buffer_addr = sdma->context_phys; 1030 bd0->buffer_addr = sdma->context_phys;
1031 bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel; 1031 bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;
@@ -2096,27 +2096,6 @@ static int sdma_probe(struct platform_device *pdev)
2096 if (pdata && pdata->script_addrs) 2096 if (pdata && pdata->script_addrs)
2097 sdma_add_scripts(sdma, pdata->script_addrs); 2097 sdma_add_scripts(sdma, pdata->script_addrs);
2098 2098
2099 if (pdata) {
2100 ret = sdma_get_firmware(sdma, pdata->fw_name);
2101 if (ret)
2102 dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
2103 } else {
2104 /*
2105 * Because that device tree does not encode ROM script address,
2106 * the RAM script in firmware is mandatory for device tree
2107 * probe, otherwise it fails.
2108 */
2109 ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
2110 &fw_name);
2111 if (ret)
2112 dev_warn(&pdev->dev, "failed to get firmware name\n");
2113 else {
2114 ret = sdma_get_firmware(sdma, fw_name);
2115 if (ret)
2116 dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
2117 }
2118 }
2119
2120 sdma->dma_device.dev = &pdev->dev; 2099 sdma->dma_device.dev = &pdev->dev;
2121 2100
2122 sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources; 2101 sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources;
@@ -2161,6 +2140,33 @@ static int sdma_probe(struct platform_device *pdev)
2161 of_node_put(spba_bus); 2140 of_node_put(spba_bus);
2162 } 2141 }
2163 2142
2143 /*
2144 * Kick off firmware loading as the very last step:
2145 * attempt to load firmware only if we're not on the error path, because
2146 * the firmware callback requires a fully functional and allocated sdma
2147 * instance.
2148 */
2149 if (pdata) {
2150 ret = sdma_get_firmware(sdma, pdata->fw_name);
2151 if (ret)
2152 dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
2153 } else {
2154 /*
2155 * Because that device tree does not encode ROM script address,
2156 * the RAM script in firmware is mandatory for device tree
2157 * probe, otherwise it fails.
2158 */
2159 ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
2160 &fw_name);
2161 if (ret) {
2162 dev_warn(&pdev->dev, "failed to get firmware name\n");
2163 } else {
2164 ret = sdma_get_firmware(sdma, fw_name);
2165 if (ret)
2166 dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
2167 }
2168 }
2169
2164 return 0; 2170 return 0;
2165 2171
2166err_register: 2172err_register:
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index 4b43844f6af5..8e90a405939d 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -799,6 +799,9 @@ static u32 process_channel_irqs(struct bam_device *bdev)
799 /* Number of bytes available to read */ 799 /* Number of bytes available to read */
800 avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1); 800 avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
801 801
802 if (offset < bchan->head)
803 avail--;
804
802 list_for_each_entry_safe(async_desc, tmp, 805 list_for_each_entry_safe(async_desc, tmp,
803 &bchan->desc_list, desc_node) { 806 &bchan->desc_list, desc_node) {
804 /* Not enough data to read */ 807 /* Not enough data to read */
diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c
index a2384184a7de..b07c17643210 100644
--- a/drivers/firmware/efi/efi-bgrt.c
+++ b/drivers/firmware/efi/efi-bgrt.c
@@ -47,11 +47,6 @@ void __init efi_bgrt_init(struct acpi_table_header *table)
47 bgrt->version); 47 bgrt->version);
48 goto out; 48 goto out;
49 } 49 }
50 if (bgrt->status & 0xfe) {
51 pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n",
52 bgrt->status);
53 goto out;
54 }
55 if (bgrt->image_type != 0) { 50 if (bgrt->image_type != 0) {
56 pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n", 51 pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n",
57 bgrt->image_type); 52 bgrt->image_type);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 16b2137d117c..4b7cf7bc0ded 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -1009,14 +1009,16 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
1009 1009
1010 /* first try to find a slot in an existing linked list entry */ 1010 /* first try to find a slot in an existing linked list entry */
1011 for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { 1011 for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
1012 rsv = __va(prsv); 1012 rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
1013 index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); 1013 index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size);
1014 if (index < rsv->size) { 1014 if (index < rsv->size) {
1015 rsv->entry[index].base = addr; 1015 rsv->entry[index].base = addr;
1016 rsv->entry[index].size = size; 1016 rsv->entry[index].size = size;
1017 1017
1018 memunmap(rsv);
1018 return 0; 1019 return 0;
1019 } 1020 }
1021 memunmap(rsv);
1020 } 1022 }
1021 1023
1022 /* no slot found - allocate a new linked list entry */ 1024 /* no slot found - allocate a new linked list entry */
@@ -1024,7 +1026,13 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
1024 if (!rsv) 1026 if (!rsv)
1025 return -ENOMEM; 1027 return -ENOMEM;
1026 1028
1027 rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE); 1029 /*
1030 * The memremap() call above assumes that a linux_efi_memreserve entry
1031 * never crosses a page boundary, so let's ensure that this remains true
1032 * even when kexec'ing a 4k pages kernel from a >4k pages kernel, by
1033 * using SZ_4K explicitly in the size calculation below.
1034 */
1035 rsv->size = EFI_MEMRESERVE_COUNT(SZ_4K);
1028 atomic_set(&rsv->count, 1); 1036 atomic_set(&rsv->count, 1);
1029 rsv->entry[0].base = addr; 1037 rsv->entry[0].base = addr;
1030 rsv->entry[0].size = size; 1038 rsv->entry[0].size = size;
diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c
index 61e099826cbb..35dccc88ac0a 100644
--- a/drivers/firmware/efi/efibc.c
+++ b/drivers/firmware/efi/efibc.c
@@ -43,11 +43,13 @@ static int efibc_set_variable(const char *name, const char *value)
43 efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data); 43 efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data);
44 memcpy(&entry->var.VendorGuid, &guid, sizeof(guid)); 44 memcpy(&entry->var.VendorGuid, &guid, sizeof(guid));
45 45
46 ret = efivar_entry_set(entry, 46 ret = efivar_entry_set_safe(entry->var.VariableName,
47 EFI_VARIABLE_NON_VOLATILE 47 entry->var.VendorGuid,
48 | EFI_VARIABLE_BOOTSERVICE_ACCESS 48 EFI_VARIABLE_NON_VOLATILE
49 | EFI_VARIABLE_RUNTIME_ACCESS, 49 | EFI_VARIABLE_BOOTSERVICE_ACCESS
50 size, entry->var.Data, NULL); 50 | EFI_VARIABLE_RUNTIME_ACCESS,
51 false, size, entry->var.Data);
52
51 if (ret) 53 if (ret)
52 pr_err("failed to set %s EFI variable: 0x%x\n", 54 pr_err("failed to set %s EFI variable: 0x%x\n",
53 name, ret); 55 name, ret);
diff --git a/drivers/gpio/gpio-mb86s7x.c b/drivers/gpio/gpio-mb86s7x.c
index 9bfff171f9fe..8f466993cd24 100644
--- a/drivers/gpio/gpio-mb86s7x.c
+++ b/drivers/gpio/gpio-mb86s7x.c
@@ -6,6 +6,7 @@
6 * Copyright (C) 2015 Linaro Ltd. 6 * Copyright (C) 2015 Linaro Ltd.
7 */ 7 */
8 8
9#include <linux/acpi.h>
9#include <linux/io.h> 10#include <linux/io.h>
10#include <linux/init.h> 11#include <linux/init.h>
11#include <linux/clk.h> 12#include <linux/clk.h>
@@ -19,6 +20,8 @@
19#include <linux/spinlock.h> 20#include <linux/spinlock.h>
20#include <linux/slab.h> 21#include <linux/slab.h>
21 22
23#include "gpiolib.h"
24
22/* 25/*
23 * Only first 8bits of a register correspond to each pin, 26 * Only first 8bits of a register correspond to each pin,
24 * so there are 4 registers for 32 pins. 27 * so there are 4 registers for 32 pins.
@@ -135,6 +138,20 @@ static void mb86s70_gpio_set(struct gpio_chip *gc, unsigned gpio, int value)
135 spin_unlock_irqrestore(&gchip->lock, flags); 138 spin_unlock_irqrestore(&gchip->lock, flags);
136} 139}
137 140
141static int mb86s70_gpio_to_irq(struct gpio_chip *gc, unsigned int offset)
142{
143 int irq, index;
144
145 for (index = 0;; index++) {
146 irq = platform_get_irq(to_platform_device(gc->parent), index);
147 if (irq <= 0)
148 break;
149 if (irq_get_irq_data(irq)->hwirq == offset)
150 return irq;
151 }
152 return -EINVAL;
153}
154
138static int mb86s70_gpio_probe(struct platform_device *pdev) 155static int mb86s70_gpio_probe(struct platform_device *pdev)
139{ 156{
140 struct mb86s70_gpio_chip *gchip; 157 struct mb86s70_gpio_chip *gchip;
@@ -150,13 +167,15 @@ static int mb86s70_gpio_probe(struct platform_device *pdev)
150 if (IS_ERR(gchip->base)) 167 if (IS_ERR(gchip->base))
151 return PTR_ERR(gchip->base); 168 return PTR_ERR(gchip->base);
152 169
153 gchip->clk = devm_clk_get(&pdev->dev, NULL); 170 if (!has_acpi_companion(&pdev->dev)) {
154 if (IS_ERR(gchip->clk)) 171 gchip->clk = devm_clk_get(&pdev->dev, NULL);
155 return PTR_ERR(gchip->clk); 172 if (IS_ERR(gchip->clk))
173 return PTR_ERR(gchip->clk);
156 174
157 ret = clk_prepare_enable(gchip->clk); 175 ret = clk_prepare_enable(gchip->clk);
158 if (ret) 176 if (ret)
159 return ret; 177 return ret;
178 }
160 179
161 spin_lock_init(&gchip->lock); 180 spin_lock_init(&gchip->lock);
162 181
@@ -172,19 +191,28 @@ static int mb86s70_gpio_probe(struct platform_device *pdev)
172 gchip->gc.parent = &pdev->dev; 191 gchip->gc.parent = &pdev->dev;
173 gchip->gc.base = -1; 192 gchip->gc.base = -1;
174 193
194 if (has_acpi_companion(&pdev->dev))
195 gchip->gc.to_irq = mb86s70_gpio_to_irq;
196
175 ret = gpiochip_add_data(&gchip->gc, gchip); 197 ret = gpiochip_add_data(&gchip->gc, gchip);
176 if (ret) { 198 if (ret) {
177 dev_err(&pdev->dev, "couldn't register gpio driver\n"); 199 dev_err(&pdev->dev, "couldn't register gpio driver\n");
178 clk_disable_unprepare(gchip->clk); 200 clk_disable_unprepare(gchip->clk);
201 return ret;
179 } 202 }
180 203
181 return ret; 204 if (has_acpi_companion(&pdev->dev))
205 acpi_gpiochip_request_interrupts(&gchip->gc);
206
207 return 0;
182} 208}
183 209
184static int mb86s70_gpio_remove(struct platform_device *pdev) 210static int mb86s70_gpio_remove(struct platform_device *pdev)
185{ 211{
186 struct mb86s70_gpio_chip *gchip = platform_get_drvdata(pdev); 212 struct mb86s70_gpio_chip *gchip = platform_get_drvdata(pdev);
187 213
214 if (has_acpi_companion(&pdev->dev))
215 acpi_gpiochip_free_interrupts(&gchip->gc);
188 gpiochip_remove(&gchip->gc); 216 gpiochip_remove(&gchip->gc);
189 clk_disable_unprepare(gchip->clk); 217 clk_disable_unprepare(gchip->clk);
190 218
@@ -197,10 +225,19 @@ static const struct of_device_id mb86s70_gpio_dt_ids[] = {
197}; 225};
198MODULE_DEVICE_TABLE(of, mb86s70_gpio_dt_ids); 226MODULE_DEVICE_TABLE(of, mb86s70_gpio_dt_ids);
199 227
228#ifdef CONFIG_ACPI
229static const struct acpi_device_id mb86s70_gpio_acpi_ids[] = {
230 { "SCX0007" },
231 { /* sentinel */ }
232};
233MODULE_DEVICE_TABLE(acpi, mb86s70_gpio_acpi_ids);
234#endif
235
200static struct platform_driver mb86s70_gpio_driver = { 236static struct platform_driver mb86s70_gpio_driver = {
201 .driver = { 237 .driver = {
202 .name = "mb86s70-gpio", 238 .name = "mb86s70-gpio",
203 .of_match_table = mb86s70_gpio_dt_ids, 239 .of_match_table = mb86s70_gpio_dt_ids,
240 .acpi_match_table = ACPI_PTR(mb86s70_gpio_acpi_ids),
204 }, 241 },
205 .probe = mb86s70_gpio_probe, 242 .probe = mb86s70_gpio_probe,
206 .remove = mb86s70_gpio_remove, 243 .remove = mb86s70_gpio_remove,
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index aec7bd86ae7e..9c9b965d7d6d 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -118,8 +118,15 @@ static void of_gpio_flags_quirks(struct device_node *np,
118 * Legacy handling of SPI active high chip select. If we have a 118 * Legacy handling of SPI active high chip select. If we have a
119 * property named "cs-gpios" we need to inspect the child node 119 * property named "cs-gpios" we need to inspect the child node
120 * to determine if the flags should have inverted semantics. 120 * to determine if the flags should have inverted semantics.
121 *
122 * This does not apply to an SPI device named "spi-gpio", because
123 * these have traditionally obtained their own GPIOs by parsing
124 * the device tree directly and did not respect any "spi-cs-high"
125 * property on the SPI bus children.
121 */ 126 */
122 if (IS_ENABLED(CONFIG_SPI_MASTER) && !strcmp(propname, "cs-gpios") && 127 if (IS_ENABLED(CONFIG_SPI_MASTER) &&
128 !strcmp(propname, "cs-gpios") &&
129 !of_device_is_compatible(np, "spi-gpio") &&
123 of_property_read_bool(np, "cs-gpios")) { 130 of_property_read_bool(np, "cs-gpios")) {
124 struct device_node *child; 131 struct device_node *child;
125 u32 cs; 132 u32 cs;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b610e3b30d95..2f18c64d531f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1959,25 +1959,6 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1959 mutex_unlock(&adev->srbm_mutex); 1959 mutex_unlock(&adev->srbm_mutex);
1960 1960
1961 gfx_v9_0_init_compute_vmid(adev); 1961 gfx_v9_0_init_compute_vmid(adev);
1962
1963 mutex_lock(&adev->grbm_idx_mutex);
1964 /*
1965 * making sure that the following register writes will be broadcasted
1966 * to all the shaders
1967 */
1968 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1969
1970 WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
1971 (adev->gfx.config.sc_prim_fifo_size_frontend <<
1972 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1973 (adev->gfx.config.sc_prim_fifo_size_backend <<
1974 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1975 (adev->gfx.config.sc_hiz_tile_fifo_size <<
1976 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1977 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1978 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1979 mutex_unlock(&adev->grbm_idx_mutex);
1980
1981} 1962}
1982 1963
1983static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1964static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 083bd8114db1..dd6b4b0b5f30 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -837,7 +837,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
837 837
838 /* No access to rdtsc. Using raw monotonic time */ 838 /* No access to rdtsc. Using raw monotonic time */
839 args->cpu_clock_counter = ktime_get_raw_ns(); 839 args->cpu_clock_counter = ktime_get_raw_ns();
840 args->system_clock_counter = ktime_get_boot_ns(); 840 args->system_clock_counter = ktime_get_boottime_ns();
841 841
842 /* Since the counter is in nano-seconds we use 1GHz frequency */ 842 /* Since the counter is in nano-seconds we use 1GHz frequency */
843 args->system_clock_freq = 1000000000; 843 args->system_clock_freq = 1000000000;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index f1d326caf69e..a7e8340baf90 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -326,7 +326,7 @@ int hwmgr_resume(struct pp_hwmgr *hwmgr)
326 if (ret) 326 if (ret)
327 return ret; 327 return ret;
328 328
329 ret = psm_adjust_power_state_dynamic(hwmgr, true, NULL); 329 ret = psm_adjust_power_state_dynamic(hwmgr, false, NULL);
330 330
331 return ret; 331 return ret;
332} 332}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
index ae64ff7153d6..1cd5a8b5cdc1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
@@ -916,8 +916,10 @@ static int init_thermal_controller(
916 PHM_PlatformCaps_ThermalController 916 PHM_PlatformCaps_ThermalController
917 ); 917 );
918 918
919 if (0 == powerplay_table->usFanTableOffset) 919 if (0 == powerplay_table->usFanTableOffset) {
920 hwmgr->thermal_controller.use_hw_fan_control = 1;
920 return 0; 921 return 0;
922 }
921 923
922 fan_table = (const PPTable_Generic_SubTable_Header *) 924 fan_table = (const PPTable_Generic_SubTable_Header *)
923 (((unsigned long)powerplay_table) + 925 (((unsigned long)powerplay_table) +
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index c92999aac07c..eccb26fddbd0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -694,6 +694,7 @@ struct pp_thermal_controller_info {
694 uint8_t ucType; 694 uint8_t ucType;
695 uint8_t ucI2cLine; 695 uint8_t ucI2cLine;
696 uint8_t ucI2cAddress; 696 uint8_t ucI2cAddress;
697 uint8_t use_hw_fan_control;
697 struct pp_fan_info fanInfo; 698 struct pp_fan_info fanInfo;
698 struct pp_advance_fan_control_parameters advanceFanControlParameters; 699 struct pp_advance_fan_control_parameters advanceFanControlParameters;
699}; 700};
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 2d4cfe14f72e..29e641c6a5db 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -2092,6 +2092,10 @@ static int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
2092 return 0; 2092 return 0;
2093 } 2093 }
2094 2094
2095 /* use hardware fan control */
2096 if (hwmgr->thermal_controller.use_hw_fan_control)
2097 return 0;
2098
2095 tmp64 = hwmgr->thermal_controller.advanceFanControlParameters. 2099 tmp64 = hwmgr->thermal_controller.advanceFanControlParameters.
2096 usPWMMin * duty100; 2100 usPWMMin * duty100;
2097 do_div(tmp64, 10000); 2101 do_div(tmp64, 10000);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 72d01e873160..5418a1a87b2c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -760,7 +760,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
760 if (IS_ERR(gpu->cmdbuf_suballoc)) { 760 if (IS_ERR(gpu->cmdbuf_suballoc)) {
761 dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n"); 761 dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
762 ret = PTR_ERR(gpu->cmdbuf_suballoc); 762 ret = PTR_ERR(gpu->cmdbuf_suballoc);
763 goto fail; 763 goto destroy_iommu;
764 } 764 }
765 765
766 /* Create buffer: */ 766 /* Create buffer: */
@@ -768,7 +768,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
768 PAGE_SIZE); 768 PAGE_SIZE);
769 if (ret) { 769 if (ret) {
770 dev_err(gpu->dev, "could not create command buffer\n"); 770 dev_err(gpu->dev, "could not create command buffer\n");
771 goto destroy_iommu; 771 goto destroy_suballoc;
772 } 772 }
773 773
774 if (gpu->mmu->version == ETNAVIV_IOMMU_V1 && 774 if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
@@ -800,6 +800,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
800free_buffer: 800free_buffer:
801 etnaviv_cmdbuf_free(&gpu->buffer); 801 etnaviv_cmdbuf_free(&gpu->buffer);
802 gpu->buffer.suballoc = NULL; 802 gpu->buffer.suballoc = NULL;
803destroy_suballoc:
804 etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
805 gpu->cmdbuf_suballoc = NULL;
803destroy_iommu: 806destroy_iommu:
804 etnaviv_iommu_destroy(gpu->mmu); 807 etnaviv_iommu_destroy(gpu->mmu);
805 gpu->mmu = NULL; 808 gpu->mmu = NULL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 029fd8ec1857..f0d45ccc1aac 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1888,12 +1888,12 @@ static int ring_request_alloc(struct i915_request *request)
1888 */ 1888 */
1889 request->reserved_space += LEGACY_REQUEST_SIZE; 1889 request->reserved_space += LEGACY_REQUEST_SIZE;
1890 1890
1891 ret = switch_context(request); 1891 /* Unconditionally invalidate GPU caches and TLBs. */
1892 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
1892 if (ret) 1893 if (ret)
1893 return ret; 1894 return ret;
1894 1895
1895 /* Unconditionally invalidate GPU caches and TLBs. */ 1896 ret = switch_context(request);
1896 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
1897 if (ret) 1897 if (ret)
1898 return ret; 1898 return ret;
1899 1899
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 9cc1d678674f..c436a28d50e4 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -91,14 +91,14 @@ static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
91 ipu_dc_disable(ipu); 91 ipu_dc_disable(ipu);
92 ipu_prg_disable(ipu); 92 ipu_prg_disable(ipu);
93 93
94 drm_crtc_vblank_off(crtc);
95
94 spin_lock_irq(&crtc->dev->event_lock); 96 spin_lock_irq(&crtc->dev->event_lock);
95 if (crtc->state->event) { 97 if (crtc->state->event && !crtc->state->active) {
96 drm_crtc_send_vblank_event(crtc, crtc->state->event); 98 drm_crtc_send_vblank_event(crtc, crtc->state->event);
97 crtc->state->event = NULL; 99 crtc->state->event = NULL;
98 } 100 }
99 spin_unlock_irq(&crtc->dev->event_lock); 101 spin_unlock_irq(&crtc->dev->event_lock);
100
101 drm_crtc_vblank_off(crtc);
102} 102}
103 103
104static void imx_drm_crtc_reset(struct drm_crtc *crtc) 104static void imx_drm_crtc_reset(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index d11e2281dde6..7e43b25785f7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -63,7 +63,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
63 return 0; 63 return 0;
64 64
65err_free: 65err_free:
66 drm_gem_object_put_unlocked(&shmem->base); 66 drm_gem_handle_delete(file, args->handle);
67 return ret; 67 return ret;
68} 68}
69 69
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index e62fe24b1a2e..5bb0f0a084e9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -619,11 +619,11 @@ static void virtio_gpu_cmd_get_edid_cb(struct virtio_gpu_device *vgdev,
619 output = vgdev->outputs + scanout; 619 output = vgdev->outputs + scanout;
620 620
621 new_edid = drm_do_get_edid(&output->conn, virtio_get_edid_block, resp); 621 new_edid = drm_do_get_edid(&output->conn, virtio_get_edid_block, resp);
622 drm_connector_update_edid_property(&output->conn, new_edid);
622 623
623 spin_lock(&vgdev->display_info_lock); 624 spin_lock(&vgdev->display_info_lock);
624 old_edid = output->edid; 625 old_edid = output->edid;
625 output->edid = new_edid; 626 output->edid = new_edid;
626 drm_connector_update_edid_property(&output->conn, output->edid);
627 spin_unlock(&vgdev->display_info_lock); 627 spin_unlock(&vgdev->display_info_lock);
628 628
629 kfree(old_edid); 629 kfree(old_edid);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index eac0c54c5970..b032d3899fa3 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -80,6 +80,7 @@
80#define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP 0x1220 80#define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP 0x1220
81#define HID_DEVICE_ID_ALPS_U1 0x1215 81#define HID_DEVICE_ID_ALPS_U1 0x1215
82#define HID_DEVICE_ID_ALPS_T4_BTNLESS 0x120C 82#define HID_DEVICE_ID_ALPS_T4_BTNLESS 0x120C
83#define HID_DEVICE_ID_ALPS_1222 0x1222
83 84
84 85
85#define USB_VENDOR_ID_AMI 0x046b 86#define USB_VENDOR_ID_AMI 0x046b
@@ -269,6 +270,7 @@
269#define USB_DEVICE_ID_CHICONY_MULTI_TOUCH 0xb19d 270#define USB_DEVICE_ID_CHICONY_MULTI_TOUCH 0xb19d
270#define USB_DEVICE_ID_CHICONY_WIRELESS 0x0618 271#define USB_DEVICE_ID_CHICONY_WIRELESS 0x0618
271#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE 0x1053 272#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE 0x1053
273#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2 0x0939
272#define USB_DEVICE_ID_CHICONY_WIRELESS2 0x1123 274#define USB_DEVICE_ID_CHICONY_WIRELESS2 0x1123
273#define USB_DEVICE_ID_ASUS_AK1D 0x1125 275#define USB_DEVICE_ID_ASUS_AK1D 0x1125
274#define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408 276#define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408
@@ -569,6 +571,7 @@
569 571
570#define USB_VENDOR_ID_HUION 0x256c 572#define USB_VENDOR_ID_HUION 0x256c
571#define USB_DEVICE_ID_HUION_TABLET 0x006e 573#define USB_DEVICE_ID_HUION_TABLET 0x006e
574#define USB_DEVICE_ID_HUION_HS64 0x006d
572 575
573#define USB_VENDOR_ID_IBM 0x04b3 576#define USB_VENDOR_ID_IBM 0x04b3
574#define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100 577#define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index e564bff86515..bfcf2ee58d14 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -30,6 +30,7 @@
30 30
31#define REPORT_ID_HIDPP_SHORT 0x10 31#define REPORT_ID_HIDPP_SHORT 0x10
32#define REPORT_ID_HIDPP_LONG 0x11 32#define REPORT_ID_HIDPP_LONG 0x11
33#define REPORT_ID_HIDPP_VERY_LONG 0x12
33 34
34#define HIDPP_REPORT_SHORT_LENGTH 7 35#define HIDPP_REPORT_SHORT_LENGTH 7
35#define HIDPP_REPORT_LONG_LENGTH 20 36#define HIDPP_REPORT_LONG_LENGTH 20
@@ -1242,7 +1243,8 @@ static int logi_dj_ll_raw_request(struct hid_device *hid,
1242 int ret; 1243 int ret;
1243 1244
1244 if ((buf[0] == REPORT_ID_HIDPP_SHORT) || 1245 if ((buf[0] == REPORT_ID_HIDPP_SHORT) ||
1245 (buf[0] == REPORT_ID_HIDPP_LONG)) { 1246 (buf[0] == REPORT_ID_HIDPP_LONG) ||
1247 (buf[0] == REPORT_ID_HIDPP_VERY_LONG)) {
1246 if (count < 2) 1248 if (count < 2)
1247 return -EINVAL; 1249 return -EINVAL;
1248 1250
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 5df5dd56ecc8..b603c14d043b 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1776,6 +1776,10 @@ static const struct hid_device_id mt_devices[] = {
1776 HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, 1776 HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
1777 USB_VENDOR_ID_ALPS_JP, 1777 USB_VENDOR_ID_ALPS_JP,
1778 HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) }, 1778 HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) },
1779 { .driver_data = MT_CLS_WIN_8_DUAL,
1780 HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
1781 USB_VENDOR_ID_ALPS_JP,
1782 HID_DEVICE_ID_ALPS_1222) },
1779 1783
1780 /* Lenovo X1 TAB Gen 2 */ 1784 /* Lenovo X1 TAB Gen 2 */
1781 { .driver_data = MT_CLS_WIN_8_DUAL, 1785 { .driver_data = MT_CLS_WIN_8_DUAL,
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index e5ca6fe2ca57..671a285724f9 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -42,6 +42,7 @@ static const struct hid_device_id hid_quirks[] = {
42 { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM), HID_QUIRK_NOGET }, 42 { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM), HID_QUIRK_NOGET },
43 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH), HID_QUIRK_MULTI_INPUT }, 43 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH), HID_QUIRK_MULTI_INPUT },
44 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, 44 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
45 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2), HID_QUIRK_ALWAYS_POLL },
45 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS), HID_QUIRK_MULTI_INPUT }, 46 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS), HID_QUIRK_MULTI_INPUT },
46 { HID_USB_DEVICE(USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD), HID_QUIRK_BADPAD }, 47 { HID_USB_DEVICE(USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD), HID_QUIRK_BADPAD },
47 { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK), HID_QUIRK_NOGET }, 48 { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK), HID_QUIRK_NOGET },
diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c
index 8fe02d81265d..914fb527ae7a 100644
--- a/drivers/hid/hid-uclogic-core.c
+++ b/drivers/hid/hid-uclogic-core.c
@@ -369,6 +369,8 @@ static const struct hid_device_id uclogic_devices[] = {
369 USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) }, 369 USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
370 { HID_USB_DEVICE(USB_VENDOR_ID_HUION, 370 { HID_USB_DEVICE(USB_VENDOR_ID_HUION,
371 USB_DEVICE_ID_HUION_TABLET) }, 371 USB_DEVICE_ID_HUION_TABLET) },
372 { HID_USB_DEVICE(USB_VENDOR_ID_HUION,
373 USB_DEVICE_ID_HUION_HS64) },
372 { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, 374 { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
373 USB_DEVICE_ID_HUION_TABLET) }, 375 USB_DEVICE_ID_HUION_TABLET) },
374 { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, 376 { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c
index 0187c9f8fc22..273d784fff66 100644
--- a/drivers/hid/hid-uclogic-params.c
+++ b/drivers/hid/hid-uclogic-params.c
@@ -977,6 +977,8 @@ int uclogic_params_init(struct uclogic_params *params,
977 /* FALL THROUGH */ 977 /* FALL THROUGH */
978 case VID_PID(USB_VENDOR_ID_HUION, 978 case VID_PID(USB_VENDOR_ID_HUION,
979 USB_DEVICE_ID_HUION_TABLET): 979 USB_DEVICE_ID_HUION_TABLET):
980 case VID_PID(USB_VENDOR_ID_HUION,
981 USB_DEVICE_ID_HUION_HS64):
980 case VID_PID(USB_VENDOR_ID_UCLOGIC, 982 case VID_PID(USB_VENDOR_ID_UCLOGIC,
981 USB_DEVICE_ID_HUION_TABLET): 983 USB_DEVICE_ID_HUION_TABLET):
982 case VID_PID(USB_VENDOR_ID_UCLOGIC, 984 case VID_PID(USB_VENDOR_ID_UCLOGIC,
diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index 22ba21457035..aa2dbed30fc3 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -816,9 +816,9 @@ static int load_fw_from_host(struct ishtp_cl_data *client_data)
816 goto end_err_fw_release; 816 goto end_err_fw_release;
817 817
818 release_firmware(fw); 818 release_firmware(fw);
819 kfree(filename);
820 dev_info(cl_data_to_dev(client_data), "ISH firmware %s loaded\n", 819 dev_info(cl_data_to_dev(client_data), "ISH firmware %s loaded\n",
821 filename); 820 filename);
821 kfree(filename);
822 return 0; 822 return 0;
823 823
824end_err_fw_release: 824end_err_fw_release:
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index c0487b34d2cf..6ba944b40fdb 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -891,7 +891,7 @@ static int hid_ishtp_cl_reset(struct ishtp_cl_device *cl_device)
891 */ 891 */
892static int hid_ishtp_cl_suspend(struct device *device) 892static int hid_ishtp_cl_suspend(struct device *device)
893{ 893{
894 struct ishtp_cl_device *cl_device = dev_get_drvdata(device); 894 struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
895 struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device); 895 struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
896 struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl); 896 struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
897 897
@@ -912,7 +912,7 @@ static int hid_ishtp_cl_suspend(struct device *device)
912 */ 912 */
913static int hid_ishtp_cl_resume(struct device *device) 913static int hid_ishtp_cl_resume(struct device *device)
914{ 914{
915 struct ishtp_cl_device *cl_device = dev_get_drvdata(device); 915 struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
916 struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device); 916 struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
917 struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl); 917 struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
918 918
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index 794e700d65f7..c47c3328a0f4 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -471,7 +471,6 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct ishtp_device *dev,
471 } 471 }
472 472
473 ishtp_device_ready = true; 473 ishtp_device_ready = true;
474 dev_set_drvdata(&device->dev, device);
475 474
476 return device; 475 return device;
477} 476}
@@ -640,6 +639,20 @@ void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device)
640EXPORT_SYMBOL(ishtp_get_drvdata); 639EXPORT_SYMBOL(ishtp_get_drvdata);
641 640
642/** 641/**
642 * ishtp_dev_to_cl_device() - get ishtp_cl_device instance from device instance
643 * @device: device instance
644 *
645 * Get ish_cl_device instance which embeds device instance in it.
646 *
647 * Return: pointer to ishtp_cl_device instance
648 */
649struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *device)
650{
651 return to_ishtp_cl_device(device);
652}
653EXPORT_SYMBOL(ishtp_dev_to_cl_device);
654
655/**
643 * ishtp_bus_new_client() - Create a new client 656 * ishtp_bus_new_client() - Create a new client
644 * @dev: ISHTP device instance 657 * @dev: ISHTP device instance
645 * 658 *
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 1c1a2514d6f3..c423e57ae888 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -10,6 +10,9 @@ config HYPERV
10 Select this option to run Linux as a Hyper-V client operating 10 Select this option to run Linux as a Hyper-V client operating
11 system. 11 system.
12 12
13config HYPERV_TIMER
14 def_bool HYPERV
15
13config HYPERV_TSCPAGE 16config HYPERV_TSCPAGE
14 def_bool HYPERV && X86_64 17 def_bool HYPERV && X86_64
15 18
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a1ea482183e8..6188fb7dda42 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -16,6 +16,7 @@
16#include <linux/version.h> 16#include <linux/version.h>
17#include <linux/random.h> 17#include <linux/random.h>
18#include <linux/clockchips.h> 18#include <linux/clockchips.h>
19#include <clocksource/hyperv_timer.h>
19#include <asm/mshyperv.h> 20#include <asm/mshyperv.h>
20#include "hyperv_vmbus.h" 21#include "hyperv_vmbus.h"
21 22
@@ -23,21 +24,6 @@
23struct hv_context hv_context; 24struct hv_context hv_context;
24 25
25/* 26/*
26 * If false, we're using the old mechanism for stimer0 interrupts
27 * where it sends a VMbus message when it expires. The old
28 * mechanism is used when running on older versions of Hyper-V
29 * that don't support Direct Mode. While Hyper-V provides
30 * four stimer's per CPU, Linux uses only stimer0.
31 */
32static bool direct_mode_enabled;
33static int stimer0_irq;
34static int stimer0_vector;
35
36#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
37#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
38#define HV_MIN_DELTA_TICKS 1
39
40/*
41 * hv_init - Main initialization routine. 27 * hv_init - Main initialization routine.
42 * 28 *
43 * This routine must be called before any other routines in here are called 29 * This routine must be called before any other routines in here are called
@@ -47,9 +33,6 @@ int hv_init(void)
47 hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context); 33 hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
48 if (!hv_context.cpu_context) 34 if (!hv_context.cpu_context)
49 return -ENOMEM; 35 return -ENOMEM;
50
51 direct_mode_enabled = ms_hyperv.misc_features &
52 HV_STIMER_DIRECT_MODE_AVAILABLE;
53 return 0; 36 return 0;
54} 37}
55 38
@@ -88,89 +71,6 @@ int hv_post_message(union hv_connection_id connection_id,
88 return status & 0xFFFF; 71 return status & 0xFFFF;
89} 72}
90 73
91/*
92 * ISR for when stimer0 is operating in Direct Mode. Direct Mode
93 * does not use VMbus or any VMbus messages, so process here and not
94 * in the VMbus driver code.
95 */
96
97static void hv_stimer0_isr(void)
98{
99 struct hv_per_cpu_context *hv_cpu;
100
101 hv_cpu = this_cpu_ptr(hv_context.cpu_context);
102 hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
103 add_interrupt_randomness(stimer0_vector, 0);
104}
105
106static int hv_ce_set_next_event(unsigned long delta,
107 struct clock_event_device *evt)
108{
109 u64 current_tick;
110
111 WARN_ON(!clockevent_state_oneshot(evt));
112
113 current_tick = hyperv_cs->read(NULL);
114 current_tick += delta;
115 hv_init_timer(0, current_tick);
116 return 0;
117}
118
119static int hv_ce_shutdown(struct clock_event_device *evt)
120{
121 hv_init_timer(0, 0);
122 hv_init_timer_config(0, 0);
123 if (direct_mode_enabled)
124 hv_disable_stimer0_percpu_irq(stimer0_irq);
125
126 return 0;
127}
128
129static int hv_ce_set_oneshot(struct clock_event_device *evt)
130{
131 union hv_stimer_config timer_cfg;
132
133 timer_cfg.as_uint64 = 0;
134 timer_cfg.enable = 1;
135 timer_cfg.auto_enable = 1;
136 if (direct_mode_enabled) {
137 /*
138 * When it expires, the timer will directly interrupt
139 * on the specified hardware vector/IRQ.
140 */
141 timer_cfg.direct_mode = 1;
142 timer_cfg.apic_vector = stimer0_vector;
143 hv_enable_stimer0_percpu_irq(stimer0_irq);
144 } else {
145 /*
146 * When it expires, the timer will generate a VMbus message,
147 * to be handled by the normal VMbus interrupt handler.
148 */
149 timer_cfg.direct_mode = 0;
150 timer_cfg.sintx = VMBUS_MESSAGE_SINT;
151 }
152 hv_init_timer_config(0, timer_cfg.as_uint64);
153 return 0;
154}
155
156static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
157{
158 dev->name = "Hyper-V clockevent";
159 dev->features = CLOCK_EVT_FEAT_ONESHOT;
160 dev->cpumask = cpumask_of(cpu);
161 dev->rating = 1000;
162 /*
163 * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
164 * result in clockevents_config_and_register() taking additional
165 * references to the hv_vmbus module making it impossible to unload.
166 */
167
168 dev->set_state_shutdown = hv_ce_shutdown;
169 dev->set_state_oneshot = hv_ce_set_oneshot;
170 dev->set_next_event = hv_ce_set_next_event;
171}
172
173
174int hv_synic_alloc(void) 74int hv_synic_alloc(void)
175{ 75{
176 int cpu; 76 int cpu;
@@ -199,14 +99,6 @@ int hv_synic_alloc(void)
199 tasklet_init(&hv_cpu->msg_dpc, 99 tasklet_init(&hv_cpu->msg_dpc,
200 vmbus_on_msg_dpc, (unsigned long) hv_cpu); 100 vmbus_on_msg_dpc, (unsigned long) hv_cpu);
201 101
202 hv_cpu->clk_evt = kzalloc(sizeof(struct clock_event_device),
203 GFP_KERNEL);
204 if (hv_cpu->clk_evt == NULL) {
205 pr_err("Unable to allocate clock event device\n");
206 goto err;
207 }
208 hv_init_clockevent_device(hv_cpu->clk_evt, cpu);
209
210 hv_cpu->synic_message_page = 102 hv_cpu->synic_message_page =
211 (void *)get_zeroed_page(GFP_ATOMIC); 103 (void *)get_zeroed_page(GFP_ATOMIC);
212 if (hv_cpu->synic_message_page == NULL) { 104 if (hv_cpu->synic_message_page == NULL) {
@@ -229,11 +121,6 @@ int hv_synic_alloc(void)
229 INIT_LIST_HEAD(&hv_cpu->chan_list); 121 INIT_LIST_HEAD(&hv_cpu->chan_list);
230 } 122 }
231 123
232 if (direct_mode_enabled &&
233 hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
234 hv_stimer0_isr))
235 goto err;
236
237 return 0; 124 return 0;
238err: 125err:
239 /* 126 /*
@@ -252,7 +139,6 @@ void hv_synic_free(void)
252 struct hv_per_cpu_context *hv_cpu 139 struct hv_per_cpu_context *hv_cpu
253 = per_cpu_ptr(hv_context.cpu_context, cpu); 140 = per_cpu_ptr(hv_context.cpu_context, cpu);
254 141
255 kfree(hv_cpu->clk_evt);
256 free_page((unsigned long)hv_cpu->synic_event_page); 142 free_page((unsigned long)hv_cpu->synic_event_page);
257 free_page((unsigned long)hv_cpu->synic_message_page); 143 free_page((unsigned long)hv_cpu->synic_message_page);
258 free_page((unsigned long)hv_cpu->post_msg_page); 144 free_page((unsigned long)hv_cpu->post_msg_page);
@@ -311,36 +197,9 @@ int hv_synic_init(unsigned int cpu)
311 197
312 hv_set_synic_state(sctrl.as_uint64); 198 hv_set_synic_state(sctrl.as_uint64);
313 199
314 /* 200 hv_stimer_init(cpu);
315 * Register the per-cpu clockevent source.
316 */
317 if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)
318 clockevents_config_and_register(hv_cpu->clk_evt,
319 HV_TIMER_FREQUENCY,
320 HV_MIN_DELTA_TICKS,
321 HV_MAX_MAX_DELTA_TICKS);
322 return 0;
323}
324
325/*
326 * hv_synic_clockevents_cleanup - Cleanup clockevent devices
327 */
328void hv_synic_clockevents_cleanup(void)
329{
330 int cpu;
331 201
332 if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) 202 return 0;
333 return;
334
335 if (direct_mode_enabled)
336 hv_remove_stimer0_irq(stimer0_irq);
337
338 for_each_present_cpu(cpu) {
339 struct hv_per_cpu_context *hv_cpu
340 = per_cpu_ptr(hv_context.cpu_context, cpu);
341
342 clockevents_unbind_device(hv_cpu->clk_evt, cpu);
343 }
344} 203}
345 204
346/* 205/*
@@ -388,14 +247,7 @@ int hv_synic_cleanup(unsigned int cpu)
388 if (channel_found && vmbus_connection.conn_state == CONNECTED) 247 if (channel_found && vmbus_connection.conn_state == CONNECTED)
389 return -EBUSY; 248 return -EBUSY;
390 249
391 /* Turn off clockevent device */ 250 hv_stimer_cleanup(cpu);
392 if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
393 struct hv_per_cpu_context *hv_cpu
394 = this_cpu_ptr(hv_context.cpu_context);
395
396 clockevents_unbind_device(hv_cpu->clk_evt, cpu);
397 hv_ce_shutdown(hv_cpu->clk_evt);
398 }
399 251
400 hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 252 hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
401 253
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 7d3d31f099ea..e32681ee7b9f 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -17,6 +17,7 @@
17#include <linux/hyperv.h> 17#include <linux/hyperv.h>
18#include <linux/clockchips.h> 18#include <linux/clockchips.h>
19#include <linux/ptp_clock_kernel.h> 19#include <linux/ptp_clock_kernel.h>
20#include <clocksource/hyperv_timer.h>
20#include <asm/mshyperv.h> 21#include <asm/mshyperv.h>
21 22
22#include "hyperv_vmbus.h" 23#include "hyperv_vmbus.h"
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index b8e1ff05f110..362e70e9d145 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -138,7 +138,6 @@ struct hv_per_cpu_context {
138 * per-cpu list of the channels based on their CPU affinity. 138 * per-cpu list of the channels based on their CPU affinity.
139 */ 139 */
140 struct list_head chan_list; 140 struct list_head chan_list;
141 struct clock_event_device *clk_evt;
142}; 141};
143 142
144struct hv_context { 143struct hv_context {
@@ -176,8 +175,6 @@ extern int hv_synic_init(unsigned int cpu);
176 175
177extern int hv_synic_cleanup(unsigned int cpu); 176extern int hv_synic_cleanup(unsigned int cpu);
178 177
179extern void hv_synic_clockevents_cleanup(void);
180
181/* Interface */ 178/* Interface */
182 179
183void hv_ringbuffer_pre_init(struct vmbus_channel *channel); 180void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 92b1874b3eb3..72d5a7cde7ea 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -30,6 +30,7 @@
30#include <linux/kdebug.h> 30#include <linux/kdebug.h>
31#include <linux/efi.h> 31#include <linux/efi.h>
32#include <linux/random.h> 32#include <linux/random.h>
33#include <clocksource/hyperv_timer.h>
33#include "hyperv_vmbus.h" 34#include "hyperv_vmbus.h"
34 35
35struct vmbus_dynid { 36struct vmbus_dynid {
@@ -955,17 +956,6 @@ static void vmbus_onmessage_work(struct work_struct *work)
955 kfree(ctx); 956 kfree(ctx);
956} 957}
957 958
958static void hv_process_timer_expiration(struct hv_message *msg,
959 struct hv_per_cpu_context *hv_cpu)
960{
961 struct clock_event_device *dev = hv_cpu->clk_evt;
962
963 if (dev->event_handler)
964 dev->event_handler(dev);
965
966 vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
967}
968
969void vmbus_on_msg_dpc(unsigned long data) 959void vmbus_on_msg_dpc(unsigned long data)
970{ 960{
971 struct hv_per_cpu_context *hv_cpu = (void *)data; 961 struct hv_per_cpu_context *hv_cpu = (void *)data;
@@ -1159,9 +1149,10 @@ static void vmbus_isr(void)
1159 1149
1160 /* Check if there are actual msgs to be processed */ 1150 /* Check if there are actual msgs to be processed */
1161 if (msg->header.message_type != HVMSG_NONE) { 1151 if (msg->header.message_type != HVMSG_NONE) {
1162 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) 1152 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
1163 hv_process_timer_expiration(msg, hv_cpu); 1153 hv_stimer0_isr();
1164 else 1154 vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
1155 } else
1165 tasklet_schedule(&hv_cpu->msg_dpc); 1156 tasklet_schedule(&hv_cpu->msg_dpc);
1166 } 1157 }
1167 1158
@@ -1263,14 +1254,19 @@ static int vmbus_bus_init(void)
1263 ret = hv_synic_alloc(); 1254 ret = hv_synic_alloc();
1264 if (ret) 1255 if (ret)
1265 goto err_alloc; 1256 goto err_alloc;
1257
1258 ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
1259 if (ret < 0)
1260 goto err_alloc;
1261
1266 /* 1262 /*
1267 * Initialize the per-cpu interrupt state and 1263 * Initialize the per-cpu interrupt state and stimer state.
1268 * connect to the host. 1264 * Then connect to the host.
1269 */ 1265 */
1270 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", 1266 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
1271 hv_synic_init, hv_synic_cleanup); 1267 hv_synic_init, hv_synic_cleanup);
1272 if (ret < 0) 1268 if (ret < 0)
1273 goto err_alloc; 1269 goto err_cpuhp;
1274 hyperv_cpuhp_online = ret; 1270 hyperv_cpuhp_online = ret;
1275 1271
1276 ret = vmbus_connect(); 1272 ret = vmbus_connect();
@@ -1318,6 +1314,8 @@ static int vmbus_bus_init(void)
1318 1314
1319err_connect: 1315err_connect:
1320 cpuhp_remove_state(hyperv_cpuhp_online); 1316 cpuhp_remove_state(hyperv_cpuhp_online);
1317err_cpuhp:
1318 hv_stimer_free();
1321err_alloc: 1319err_alloc:
1322 hv_synic_free(); 1320 hv_synic_free();
1323 hv_remove_vmbus_irq(); 1321 hv_remove_vmbus_irq();
@@ -2064,7 +2062,7 @@ static struct acpi_driver vmbus_acpi_driver = {
2064 2062
2065static void hv_kexec_handler(void) 2063static void hv_kexec_handler(void)
2066{ 2064{
2067 hv_synic_clockevents_cleanup(); 2065 hv_stimer_global_cleanup();
2068 vmbus_initiate_unload(false); 2066 vmbus_initiate_unload(false);
2069 vmbus_connection.conn_state = DISCONNECTED; 2067 vmbus_connection.conn_state = DISCONNECTED;
2070 /* Make sure conn_state is set as hv_synic_cleanup checks for it */ 2068 /* Make sure conn_state is set as hv_synic_cleanup checks for it */
@@ -2075,6 +2073,8 @@ static void hv_kexec_handler(void)
2075 2073
2076static void hv_crash_handler(struct pt_regs *regs) 2074static void hv_crash_handler(struct pt_regs *regs)
2077{ 2075{
2076 int cpu;
2077
2078 vmbus_initiate_unload(true); 2078 vmbus_initiate_unload(true);
2079 /* 2079 /*
2080 * In crash handler we can't schedule synic cleanup for all CPUs, 2080 * In crash handler we can't schedule synic cleanup for all CPUs,
@@ -2082,7 +2082,9 @@ static void hv_crash_handler(struct pt_regs *regs)
2082 * for kdump. 2082 * for kdump.
2083 */ 2083 */
2084 vmbus_connection.conn_state = DISCONNECTED; 2084 vmbus_connection.conn_state = DISCONNECTED;
2085 hv_synic_cleanup(smp_processor_id()); 2085 cpu = smp_processor_id();
2086 hv_stimer_cleanup(cpu);
2087 hv_synic_cleanup(cpu);
2086 hyperv_cleanup(); 2088 hyperv_cleanup();
2087}; 2089};
2088 2090
@@ -2131,7 +2133,7 @@ static void __exit vmbus_exit(void)
2131 hv_remove_kexec_handler(); 2133 hv_remove_kexec_handler();
2132 hv_remove_crash_handler(); 2134 hv_remove_crash_handler();
2133 vmbus_connection.conn_state = DISCONNECTED; 2135 vmbus_connection.conn_state = DISCONNECTED;
2134 hv_synic_clockevents_cleanup(); 2136 hv_stimer_global_cleanup();
2135 vmbus_disconnect(); 2137 vmbus_disconnect();
2136 hv_remove_vmbus_irq(); 2138 hv_remove_vmbus_irq();
2137 for_each_online_cpu(cpu) { 2139 for_each_online_cpu(cpu) {
diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c
index c8159205c77d..4e22b3c3e488 100644
--- a/drivers/iio/humidity/dht11.c
+++ b/drivers/iio/humidity/dht11.c
@@ -149,7 +149,7 @@ static int dht11_decode(struct dht11 *dht11, int offset)
149 return -EIO; 149 return -EIO;
150 } 150 }
151 151
152 dht11->timestamp = ktime_get_boot_ns(); 152 dht11->timestamp = ktime_get_boottime_ns();
153 if (hum_int < 4) { /* DHT22: 100000 = (3*256+232)*100 */ 153 if (hum_int < 4) { /* DHT22: 100000 = (3*256+232)*100 */
154 dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) * 154 dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) *
155 ((temp_int & 0x80) ? -100 : 100); 155 ((temp_int & 0x80) ? -100 : 100);
@@ -177,7 +177,7 @@ static irqreturn_t dht11_handle_irq(int irq, void *data)
177 177
178 /* TODO: Consider making the handler safe for IRQ sharing */ 178 /* TODO: Consider making the handler safe for IRQ sharing */
179 if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) { 179 if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
180 dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns(); 180 dht11->edges[dht11->num_edges].ts = ktime_get_boottime_ns();
181 dht11->edges[dht11->num_edges++].value = 181 dht11->edges[dht11->num_edges++].value =
182 gpio_get_value(dht11->gpio); 182 gpio_get_value(dht11->gpio);
183 183
@@ -196,7 +196,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
196 int ret, timeres, offset; 196 int ret, timeres, offset;
197 197
198 mutex_lock(&dht11->lock); 198 mutex_lock(&dht11->lock);
199 if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boot_ns()) { 199 if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boottime_ns()) {
200 timeres = ktime_get_resolution_ns(); 200 timeres = ktime_get_resolution_ns();
201 dev_dbg(dht11->dev, "current timeresolution: %dns\n", timeres); 201 dev_dbg(dht11->dev, "current timeresolution: %dns\n", timeres);
202 if (timeres > DHT11_MIN_TIMERES) { 202 if (timeres > DHT11_MIN_TIMERES) {
@@ -322,7 +322,7 @@ static int dht11_probe(struct platform_device *pdev)
322 return -EINVAL; 322 return -EINVAL;
323 } 323 }
324 324
325 dht11->timestamp = ktime_get_boot_ns() - DHT11_DATA_VALID_TIME - 1; 325 dht11->timestamp = ktime_get_boottime_ns() - DHT11_DATA_VALID_TIME - 1;
326 dht11->num_edges = -1; 326 dht11->num_edges = -1;
327 327
328 platform_set_drvdata(pdev, iio); 328 platform_set_drvdata(pdev, iio);
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 245b5844028d..401d7ff99853 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -228,9 +228,9 @@ s64 iio_get_time_ns(const struct iio_dev *indio_dev)
228 ktime_get_coarse_ts64(&tp); 228 ktime_get_coarse_ts64(&tp);
229 return timespec64_to_ns(&tp); 229 return timespec64_to_ns(&tp);
230 case CLOCK_BOOTTIME: 230 case CLOCK_BOOTTIME:
231 return ktime_get_boot_ns(); 231 return ktime_get_boottime_ns();
232 case CLOCK_TAI: 232 case CLOCK_TAI:
233 return ktime_get_tai_ns(); 233 return ktime_get_clocktai_ns();
234 default: 234 default:
235 BUG(); 235 BUG();
236 } 236 }
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 29f7b15c81d9..d020bb4d03d5 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -457,7 +457,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name)
457 int rc; 457 int rc;
458 int i; 458 int i;
459 459
460 lockdep_assert_held_exclusive(&devices_rwsem); 460 lockdep_assert_held_write(&devices_rwsem);
461 ida_init(&inuse); 461 ida_init(&inuse);
462 xa_for_each (&devices, index, device) { 462 xa_for_each (&devices, index, device) {
463 char buf[IB_DEVICE_NAME_MAX]; 463 char buf[IB_DEVICE_NAME_MAX];
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 2a0b59a4b6eb..cca414ecfcd5 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -310,7 +310,7 @@ static void aliasguid_query_handler(int status,
310 if (status) { 310 if (status) {
311 pr_debug("(port: %d) failed: status = %d\n", 311 pr_debug("(port: %d) failed: status = %d\n",
312 cb_ctx->port, status); 312 cb_ctx->port, status);
313 rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC; 313 rec->time_to_run = ktime_get_boottime_ns() + 1 * NSEC_PER_SEC;
314 goto out; 314 goto out;
315 } 315 }
316 316
@@ -416,7 +416,7 @@ next_entry:
416 be64_to_cpu((__force __be64)rec->guid_indexes), 416 be64_to_cpu((__force __be64)rec->guid_indexes),
417 be64_to_cpu((__force __be64)applied_guid_indexes), 417 be64_to_cpu((__force __be64)applied_guid_indexes),
418 be64_to_cpu((__force __be64)declined_guid_indexes)); 418 be64_to_cpu((__force __be64)declined_guid_indexes));
419 rec->time_to_run = ktime_get_boot_ns() + 419 rec->time_to_run = ktime_get_boottime_ns() +
420 resched_delay_sec * NSEC_PER_SEC; 420 resched_delay_sec * NSEC_PER_SEC;
421 } else { 421 } else {
422 rec->status = MLX4_GUID_INFO_STATUS_SET; 422 rec->status = MLX4_GUID_INFO_STATUS_SET;
@@ -709,7 +709,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
709 } 709 }
710 } 710 }
711 if (resched_delay_sec) { 711 if (resched_delay_sec) {
712 u64 curr_time = ktime_get_boot_ns(); 712 u64 curr_time = ktime_get_boottime_ns();
713 713
714 *resched_delay_sec = (low_record_time < curr_time) ? 0 : 714 *resched_delay_sec = (low_record_time < curr_time) ? 0 :
715 div_u64((low_record_time - curr_time), NSEC_PER_SEC); 715 div_u64((low_record_time - curr_time), NSEC_PER_SEC);
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 659c5e0fb835..80e10f4e213a 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -15,10 +15,10 @@ config ARM_GIC_PM
15 bool 15 bool
16 depends on PM 16 depends on PM
17 select ARM_GIC 17 select ARM_GIC
18 select PM_CLK
19 18
20config ARM_GIC_MAX_NR 19config ARM_GIC_MAX_NR
21 int 20 int
21 depends on ARM_GIC
22 default 2 if ARCH_REALVIEW 22 default 2 if ARCH_REALVIEW
23 default 1 23 default 1
24 24
@@ -87,6 +87,14 @@ config ALPINE_MSI
87 select PCI_MSI 87 select PCI_MSI
88 select GENERIC_IRQ_CHIP 88 select GENERIC_IRQ_CHIP
89 89
90config AL_FIC
91 bool "Amazon's Annapurna Labs Fabric Interrupt Controller"
92 depends on OF || COMPILE_TEST
93 select GENERIC_IRQ_CHIP
94 select IRQ_DOMAIN
95 help
96 Support Amazon's Annapurna Labs Fabric Interrupt Controller.
97
90config ATMEL_AIC_IRQ 98config ATMEL_AIC_IRQ
91 bool 99 bool
92 select GENERIC_IRQ_CHIP 100 select GENERIC_IRQ_CHIP
@@ -217,13 +225,26 @@ config RDA_INTC
217 select IRQ_DOMAIN 225 select IRQ_DOMAIN
218 226
219config RENESAS_INTC_IRQPIN 227config RENESAS_INTC_IRQPIN
220 bool 228 bool "Renesas INTC External IRQ Pin Support" if COMPILE_TEST
221 select IRQ_DOMAIN 229 select IRQ_DOMAIN
230 help
231 Enable support for the Renesas Interrupt Controller for external
232 interrupt pins, as found on SH/R-Mobile and R-Car Gen1 SoCs.
222 233
223config RENESAS_IRQC 234config RENESAS_IRQC
224 bool 235 bool "Renesas R-Mobile APE6 and R-Car IRQC support" if COMPILE_TEST
225 select GENERIC_IRQ_CHIP 236 select GENERIC_IRQ_CHIP
226 select IRQ_DOMAIN 237 select IRQ_DOMAIN
238 help
239 Enable support for the Renesas Interrupt Controller for external
240 devices, as found on R-Mobile APE6, R-Car Gen2, and R-Car Gen3 SoCs.
241
242config RENESAS_RZA1_IRQC
243 bool "Renesas RZ/A1 IRQC support" if COMPILE_TEST
244 select IRQ_DOMAIN_HIERARCHY
245 help
246 Enable support for the Renesas RZ/A1 Interrupt Controller, to use up
247 to 8 external interrupts with configurable sense select.
227 248
228config ST_IRQCHIP 249config ST_IRQCHIP
229 bool 250 bool
@@ -299,8 +320,11 @@ config RENESAS_H8300H_INTC
299 select IRQ_DOMAIN 320 select IRQ_DOMAIN
300 321
301config RENESAS_H8S_INTC 322config RENESAS_H8S_INTC
302 bool 323 bool "Renesas H8S Interrupt Controller Support" if COMPILE_TEST
303 select IRQ_DOMAIN 324 select IRQ_DOMAIN
325 help
326 Enable support for the Renesas H8/300 Interrupt Controller, as found
327 on Renesas H8S SoCs.
304 328
305config IMX_GPCV2 329config IMX_GPCV2
306 bool 330 bool
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 606a003a0000..8d0fcec6ab23 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -1,6 +1,7 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-$(CONFIG_IRQCHIP) += irqchip.o 2obj-$(CONFIG_IRQCHIP) += irqchip.o
3 3
4obj-$(CONFIG_AL_FIC) += irq-al-fic.o
4obj-$(CONFIG_ALPINE_MSI) += irq-alpine-msi.o 5obj-$(CONFIG_ALPINE_MSI) += irq-alpine-msi.o
5obj-$(CONFIG_ATH79) += irq-ath79-cpu.o 6obj-$(CONFIG_ATH79) += irq-ath79-cpu.o
6obj-$(CONFIG_ATH79) += irq-ath79-misc.o 7obj-$(CONFIG_ATH79) += irq-ath79-misc.o
@@ -49,6 +50,7 @@ obj-$(CONFIG_JCORE_AIC) += irq-jcore-aic.o
49obj-$(CONFIG_RDA_INTC) += irq-rda-intc.o 50obj-$(CONFIG_RDA_INTC) += irq-rda-intc.o
50obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o 51obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o
51obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o 52obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o
53obj-$(CONFIG_RENESAS_RZA1_IRQC) += irq-renesas-rza1.o
52obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o 54obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o
53obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o 55obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o
54obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o 56obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o
diff --git a/drivers/irqchip/irq-al-fic.c b/drivers/irqchip/irq-al-fic.c
new file mode 100644
index 000000000000..1a57cee3efab
--- /dev/null
+++ b/drivers/irqchip/irq-al-fic.c
@@ -0,0 +1,278 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 */
5
6#include <linux/bitfield.h>
7#include <linux/irq.h>
8#include <linux/irqchip.h>
9#include <linux/irqchip/chained_irq.h>
10#include <linux/irqdomain.h>
11#include <linux/module.h>
12#include <linux/of.h>
13#include <linux/of_address.h>
14#include <linux/of_irq.h>
15
16/* FIC Registers */
17#define AL_FIC_CAUSE 0x00
18#define AL_FIC_MASK 0x10
19#define AL_FIC_CONTROL 0x28
20
21#define CONTROL_TRIGGER_RISING BIT(3)
22#define CONTROL_MASK_MSI_X BIT(5)
23
24#define NR_FIC_IRQS 32
25
26MODULE_AUTHOR("Talel Shenhar");
27MODULE_DESCRIPTION("Amazon's Annapurna Labs Interrupt Controller Driver");
28MODULE_LICENSE("GPL v2");
29
30enum al_fic_state {
31 AL_FIC_UNCONFIGURED = 0,
32 AL_FIC_CONFIGURED_LEVEL,
33 AL_FIC_CONFIGURED_RISING_EDGE,
34};
35
36struct al_fic {
37 void __iomem *base;
38 struct irq_domain *domain;
39 const char *name;
40 unsigned int parent_irq;
41 enum al_fic_state state;
42};
43
44static void al_fic_set_trigger(struct al_fic *fic,
45 struct irq_chip_generic *gc,
46 enum al_fic_state new_state)
47{
48 irq_flow_handler_t handler;
49 u32 control = readl_relaxed(fic->base + AL_FIC_CONTROL);
50
51 if (new_state == AL_FIC_CONFIGURED_LEVEL) {
52 handler = handle_level_irq;
53 control &= ~CONTROL_TRIGGER_RISING;
54 } else {
55 handler = handle_edge_irq;
56 control |= CONTROL_TRIGGER_RISING;
57 }
58 gc->chip_types->handler = handler;
59 fic->state = new_state;
60 writel_relaxed(control, fic->base + AL_FIC_CONTROL);
61}
62
63static int al_fic_irq_set_type(struct irq_data *data, unsigned int flow_type)
64{
65 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
66 struct al_fic *fic = gc->private;
67 enum al_fic_state new_state;
68 int ret = 0;
69
70 irq_gc_lock(gc);
71
72 if (((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_LEVEL_HIGH) &&
73 ((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_EDGE_RISING)) {
74 pr_debug("fic doesn't support flow type %d\n", flow_type);
75 ret = -EINVAL;
76 goto err;
77 }
78
79 new_state = (flow_type & IRQ_TYPE_LEVEL_HIGH) ?
80 AL_FIC_CONFIGURED_LEVEL : AL_FIC_CONFIGURED_RISING_EDGE;
81
82 /*
83 * A given FIC instance can be either all level or all edge triggered.
84 * This is generally fixed depending on what pieces of HW it's wired up
85 * to.
86 *
87 * We configure it based on the sensitivity of the first source
88 * being setup, and reject any subsequent attempt at configuring it in a
89 * different way.
90 */
91 if (fic->state == AL_FIC_UNCONFIGURED) {
92 al_fic_set_trigger(fic, gc, new_state);
93 } else if (fic->state != new_state) {
94 pr_debug("fic %s state already configured to %d\n",
95 fic->name, fic->state);
96 ret = -EINVAL;
97 goto err;
98 }
99
100err:
101 irq_gc_unlock(gc);
102
103 return ret;
104}
105
106static void al_fic_irq_handler(struct irq_desc *desc)
107{
108 struct al_fic *fic = irq_desc_get_handler_data(desc);
109 struct irq_domain *domain = fic->domain;
110 struct irq_chip *irqchip = irq_desc_get_chip(desc);
111 struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0);
112 unsigned long pending;
113 unsigned int irq;
114 u32 hwirq;
115
116 chained_irq_enter(irqchip, desc);
117
118 pending = readl_relaxed(fic->base + AL_FIC_CAUSE);
119 pending &= ~gc->mask_cache;
120
121 for_each_set_bit(hwirq, &pending, NR_FIC_IRQS) {
122 irq = irq_find_mapping(domain, hwirq);
123 generic_handle_irq(irq);
124 }
125
126 chained_irq_exit(irqchip, desc);
127}
128
129static int al_fic_register(struct device_node *node,
130 struct al_fic *fic)
131{
132 struct irq_chip_generic *gc;
133 int ret;
134
135 fic->domain = irq_domain_add_linear(node,
136 NR_FIC_IRQS,
137 &irq_generic_chip_ops,
138 fic);
139 if (!fic->domain) {
140 pr_err("fail to add irq domain\n");
141 return -ENOMEM;
142 }
143
144 ret = irq_alloc_domain_generic_chips(fic->domain,
145 NR_FIC_IRQS,
146 1, fic->name,
147 handle_level_irq,
148 0, 0, IRQ_GC_INIT_MASK_CACHE);
149 if (ret) {
150 pr_err("fail to allocate generic chip (%d)\n", ret);
151 goto err_domain_remove;
152 }
153
154 gc = irq_get_domain_generic_chip(fic->domain, 0);
155 gc->reg_base = fic->base;
156 gc->chip_types->regs.mask = AL_FIC_MASK;
157 gc->chip_types->regs.ack = AL_FIC_CAUSE;
158 gc->chip_types->chip.irq_mask = irq_gc_mask_set_bit;
159 gc->chip_types->chip.irq_unmask = irq_gc_mask_clr_bit;
160 gc->chip_types->chip.irq_ack = irq_gc_ack_clr_bit;
161 gc->chip_types->chip.irq_set_type = al_fic_irq_set_type;
162 gc->chip_types->chip.flags = IRQCHIP_SKIP_SET_WAKE;
163 gc->private = fic;
164
165 irq_set_chained_handler_and_data(fic->parent_irq,
166 al_fic_irq_handler,
167 fic);
168 return 0;
169
170err_domain_remove:
171 irq_domain_remove(fic->domain);
172
173 return ret;
174}
175
176/*
177 * al_fic_wire_init() - initialize and configure fic in wire mode
178 * @of_node: optional pointer to interrupt controller's device tree node.
179 * @base: mmio to fic register
180 * @name: name of the fic
181 * @parent_irq: interrupt of parent
182 *
183 * This API will configure the fic hardware to to work in wire mode.
184 * In wire mode, fic hardware is generating a wire ("wired") interrupt.
185 * Interrupt can be generated based on positive edge or level - configuration is
186 * to be determined based on connected hardware to this fic.
187 */
188static struct al_fic *al_fic_wire_init(struct device_node *node,
189 void __iomem *base,
190 const char *name,
191 unsigned int parent_irq)
192{
193 struct al_fic *fic;
194 int ret;
195 u32 control = CONTROL_MASK_MSI_X;
196
197 fic = kzalloc(sizeof(*fic), GFP_KERNEL);
198 if (!fic)
199 return ERR_PTR(-ENOMEM);
200
201 fic->base = base;
202 fic->parent_irq = parent_irq;
203 fic->name = name;
204
205 /* mask out all interrupts */
206 writel_relaxed(0xFFFFFFFF, fic->base + AL_FIC_MASK);
207
208 /* clear any pending interrupt */
209 writel_relaxed(0, fic->base + AL_FIC_CAUSE);
210
211 writel_relaxed(control, fic->base + AL_FIC_CONTROL);
212
213 ret = al_fic_register(node, fic);
214 if (ret) {
215 pr_err("fail to register irqchip\n");
216 goto err_free;
217 }
218
219 pr_debug("%s initialized successfully in Legacy mode (parent-irq=%u)\n",
220 fic->name, parent_irq);
221
222 return fic;
223
224err_free:
225 kfree(fic);
226 return ERR_PTR(ret);
227}
228
229static int __init al_fic_init_dt(struct device_node *node,
230 struct device_node *parent)
231{
232 int ret;
233 void __iomem *base;
234 unsigned int parent_irq;
235 struct al_fic *fic;
236
237 if (!parent) {
238 pr_err("%s: unsupported - device require a parent\n",
239 node->name);
240 return -EINVAL;
241 }
242
243 base = of_iomap(node, 0);
244 if (!base) {
245 pr_err("%s: fail to map memory\n", node->name);
246 return -ENOMEM;
247 }
248
249 parent_irq = irq_of_parse_and_map(node, 0);
250 if (!parent_irq) {
251 pr_err("%s: fail to map irq\n", node->name);
252 ret = -EINVAL;
253 goto err_unmap;
254 }
255
256 fic = al_fic_wire_init(node,
257 base,
258 node->name,
259 parent_irq);
260 if (IS_ERR(fic)) {
261 pr_err("%s: fail to initialize irqchip (%lu)\n",
262 node->name,
263 PTR_ERR(fic));
264 ret = PTR_ERR(fic);
265 goto err_irq_dispose;
266 }
267
268 return 0;
269
270err_irq_dispose:
271 irq_dispose_mapping(parent_irq);
272err_unmap:
273 iounmap(base);
274
275 return ret;
276}
277
278IRQCHIP_DECLARE(al_fic, "amazon,al-fic", al_fic_init_dt);
diff --git a/drivers/irqchip/irq-csky-mpintc.c b/drivers/irqchip/irq-csky-mpintc.c
index c67c961ab6cc..a1534edef7fa 100644
--- a/drivers/irqchip/irq-csky-mpintc.c
+++ b/drivers/irqchip/irq-csky-mpintc.c
@@ -32,8 +32,8 @@ static void __iomem *INTCL_base;
32#define INTCG_CIDSTR 0x1000 32#define INTCG_CIDSTR 0x1000
33 33
34#define INTCL_PICTLR 0x0 34#define INTCL_PICTLR 0x0
35#define INTCL_CFGR 0x14
35#define INTCL_SIGR 0x60 36#define INTCL_SIGR 0x60
36#define INTCL_HPPIR 0x68
37#define INTCL_RDYIR 0x6c 37#define INTCL_RDYIR 0x6c
38#define INTCL_SENR 0xa0 38#define INTCL_SENR 0xa0
39#define INTCL_CENR 0xa4 39#define INTCL_CENR 0xa4
@@ -41,21 +41,49 @@ static void __iomem *INTCL_base;
41 41
42static DEFINE_PER_CPU(void __iomem *, intcl_reg); 42static DEFINE_PER_CPU(void __iomem *, intcl_reg);
43 43
44static unsigned long *__trigger;
45
46#define IRQ_OFFSET(irq) ((irq < COMM_IRQ_BASE) ? irq : (irq - COMM_IRQ_BASE))
47
48#define TRIG_BYTE_OFFSET(i) ((((i) * 2) / 32) * 4)
49#define TRIG_BIT_OFFSET(i) (((i) * 2) % 32)
50
51#define TRIG_VAL(trigger, irq) (trigger << TRIG_BIT_OFFSET(IRQ_OFFSET(irq)))
52#define TRIG_VAL_MSK(irq) (~(3 << TRIG_BIT_OFFSET(IRQ_OFFSET(irq))))
53
54#define TRIG_BASE(irq) \
55 (TRIG_BYTE_OFFSET(IRQ_OFFSET(irq)) + ((irq < COMM_IRQ_BASE) ? \
56 (this_cpu_read(intcl_reg) + INTCL_CFGR) : (INTCG_base + INTCG_CICFGR)))
57
58static DEFINE_SPINLOCK(setup_lock);
59static void setup_trigger(unsigned long irq, unsigned long trigger)
60{
61 unsigned int tmp;
62
63 spin_lock(&setup_lock);
64
65 /* setup trigger */
66 tmp = readl_relaxed(TRIG_BASE(irq)) & TRIG_VAL_MSK(irq);
67
68 writel_relaxed(tmp | TRIG_VAL(trigger, irq), TRIG_BASE(irq));
69
70 spin_unlock(&setup_lock);
71}
72
44static void csky_mpintc_handler(struct pt_regs *regs) 73static void csky_mpintc_handler(struct pt_regs *regs)
45{ 74{
46 void __iomem *reg_base = this_cpu_read(intcl_reg); 75 void __iomem *reg_base = this_cpu_read(intcl_reg);
47 76
48 do { 77 handle_domain_irq(root_domain,
49 handle_domain_irq(root_domain, 78 readl_relaxed(reg_base + INTCL_RDYIR), regs);
50 readl_relaxed(reg_base + INTCL_RDYIR),
51 regs);
52 } while (readl_relaxed(reg_base + INTCL_HPPIR) & BIT(31));
53} 79}
54 80
55static void csky_mpintc_enable(struct irq_data *d) 81static void csky_mpintc_enable(struct irq_data *d)
56{ 82{
57 void __iomem *reg_base = this_cpu_read(intcl_reg); 83 void __iomem *reg_base = this_cpu_read(intcl_reg);
58 84
85 setup_trigger(d->hwirq, __trigger[d->hwirq]);
86
59 writel_relaxed(d->hwirq, reg_base + INTCL_SENR); 87 writel_relaxed(d->hwirq, reg_base + INTCL_SENR);
60} 88}
61 89
@@ -73,6 +101,28 @@ static void csky_mpintc_eoi(struct irq_data *d)
73 writel_relaxed(d->hwirq, reg_base + INTCL_CACR); 101 writel_relaxed(d->hwirq, reg_base + INTCL_CACR);
74} 102}
75 103
104static int csky_mpintc_set_type(struct irq_data *d, unsigned int type)
105{
106 switch (type & IRQ_TYPE_SENSE_MASK) {
107 case IRQ_TYPE_LEVEL_HIGH:
108 __trigger[d->hwirq] = 0;
109 break;
110 case IRQ_TYPE_LEVEL_LOW:
111 __trigger[d->hwirq] = 1;
112 break;
113 case IRQ_TYPE_EDGE_RISING:
114 __trigger[d->hwirq] = 2;
115 break;
116 case IRQ_TYPE_EDGE_FALLING:
117 __trigger[d->hwirq] = 3;
118 break;
119 default:
120 return -EINVAL;
121 }
122
123 return 0;
124}
125
76#ifdef CONFIG_SMP 126#ifdef CONFIG_SMP
77static int csky_irq_set_affinity(struct irq_data *d, 127static int csky_irq_set_affinity(struct irq_data *d,
78 const struct cpumask *mask_val, 128 const struct cpumask *mask_val,
@@ -89,8 +139,19 @@ static int csky_irq_set_affinity(struct irq_data *d,
89 if (cpu >= nr_cpu_ids) 139 if (cpu >= nr_cpu_ids)
90 return -EINVAL; 140 return -EINVAL;
91 141
92 /* Enable interrupt destination */ 142 /*
93 cpu |= BIT(31); 143 * The csky,mpintc could support auto irq deliver, but it only
144 * could deliver external irq to one cpu or all cpus. So it
145 * doesn't support deliver external irq to a group of cpus
146 * with cpu_mask.
147 * SO we only use auto deliver mode when affinity mask_val is
148 * equal to cpu_present_mask.
149 *
150 */
151 if (cpumask_equal(mask_val, cpu_present_mask))
152 cpu = 0;
153 else
154 cpu |= BIT(31);
94 155
95 writel_relaxed(cpu, INTCG_base + INTCG_CIDSTR + offset); 156 writel_relaxed(cpu, INTCG_base + INTCG_CIDSTR + offset);
96 157
@@ -105,6 +166,7 @@ static struct irq_chip csky_irq_chip = {
105 .irq_eoi = csky_mpintc_eoi, 166 .irq_eoi = csky_mpintc_eoi,
106 .irq_enable = csky_mpintc_enable, 167 .irq_enable = csky_mpintc_enable,
107 .irq_disable = csky_mpintc_disable, 168 .irq_disable = csky_mpintc_disable,
169 .irq_set_type = csky_mpintc_set_type,
108#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
109 .irq_set_affinity = csky_irq_set_affinity, 171 .irq_set_affinity = csky_irq_set_affinity,
110#endif 172#endif
@@ -125,9 +187,26 @@ static int csky_irqdomain_map(struct irq_domain *d, unsigned int irq,
125 return 0; 187 return 0;
126} 188}
127 189
190static int csky_irq_domain_xlate_cells(struct irq_domain *d,
191 struct device_node *ctrlr, const u32 *intspec,
192 unsigned int intsize, unsigned long *out_hwirq,
193 unsigned int *out_type)
194{
195 if (WARN_ON(intsize < 1))
196 return -EINVAL;
197
198 *out_hwirq = intspec[0];
199 if (intsize > 1)
200 *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
201 else
202 *out_type = IRQ_TYPE_LEVEL_HIGH;
203
204 return 0;
205}
206
128static const struct irq_domain_ops csky_irqdomain_ops = { 207static const struct irq_domain_ops csky_irqdomain_ops = {
129 .map = csky_irqdomain_map, 208 .map = csky_irqdomain_map,
130 .xlate = irq_domain_xlate_onecell, 209 .xlate = csky_irq_domain_xlate_cells,
131}; 210};
132 211
133#ifdef CONFIG_SMP 212#ifdef CONFIG_SMP
@@ -161,6 +240,10 @@ csky_mpintc_init(struct device_node *node, struct device_node *parent)
161 if (ret < 0) 240 if (ret < 0)
162 nr_irq = INTC_IRQS; 241 nr_irq = INTC_IRQS;
163 242
243 __trigger = kcalloc(nr_irq, sizeof(unsigned long), GFP_KERNEL);
244 if (__trigger == NULL)
245 return -ENXIO;
246
164 if (INTCG_base == NULL) { 247 if (INTCG_base == NULL) {
165 INTCG_base = ioremap(mfcr("cr<31, 14>"), 248 INTCG_base = ioremap(mfcr("cr<31, 14>"),
166 INTCL_SIZE*nr_cpu_ids + INTCG_SIZE); 249 INTCL_SIZE*nr_cpu_ids + INTCG_SIZE);
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 875ac80f690b..7338f90b2f9e 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -53,6 +53,7 @@
53 53
54/* List of flags for specific v2m implementation */ 54/* List of flags for specific v2m implementation */
55#define GICV2M_NEEDS_SPI_OFFSET 0x00000001 55#define GICV2M_NEEDS_SPI_OFFSET 0x00000001
56#define GICV2M_GRAVITON_ADDRESS_ONLY 0x00000002
56 57
57static LIST_HEAD(v2m_nodes); 58static LIST_HEAD(v2m_nodes);
58static DEFINE_SPINLOCK(v2m_lock); 59static DEFINE_SPINLOCK(v2m_lock);
@@ -95,15 +96,26 @@ static struct msi_domain_info gicv2m_msi_domain_info = {
95 .chip = &gicv2m_msi_irq_chip, 96 .chip = &gicv2m_msi_irq_chip,
96}; 97};
97 98
99static phys_addr_t gicv2m_get_msi_addr(struct v2m_data *v2m, int hwirq)
100{
101 if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)
102 return v2m->res.start | ((hwirq - 32) << 3);
103 else
104 return v2m->res.start + V2M_MSI_SETSPI_NS;
105}
106
98static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) 107static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
99{ 108{
100 struct v2m_data *v2m = irq_data_get_irq_chip_data(data); 109 struct v2m_data *v2m = irq_data_get_irq_chip_data(data);
101 phys_addr_t addr = v2m->res.start + V2M_MSI_SETSPI_NS; 110 phys_addr_t addr = gicv2m_get_msi_addr(v2m, data->hwirq);
102 111
103 msg->address_hi = upper_32_bits(addr); 112 msg->address_hi = upper_32_bits(addr);
104 msg->address_lo = lower_32_bits(addr); 113 msg->address_lo = lower_32_bits(addr);
105 msg->data = data->hwirq;
106 114
115 if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)
116 msg->data = 0;
117 else
118 msg->data = data->hwirq;
107 if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET) 119 if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET)
108 msg->data -= v2m->spi_offset; 120 msg->data -= v2m->spi_offset;
109 121
@@ -185,7 +197,7 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
185 hwirq = v2m->spi_start + offset; 197 hwirq = v2m->spi_start + offset;
186 198
187 err = iommu_dma_prepare_msi(info->desc, 199 err = iommu_dma_prepare_msi(info->desc,
188 v2m->res.start + V2M_MSI_SETSPI_NS); 200 gicv2m_get_msi_addr(v2m, hwirq));
189 if (err) 201 if (err)
190 return err; 202 return err;
191 203
@@ -304,7 +316,7 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
304 316
305static int __init gicv2m_init_one(struct fwnode_handle *fwnode, 317static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
306 u32 spi_start, u32 nr_spis, 318 u32 spi_start, u32 nr_spis,
307 struct resource *res) 319 struct resource *res, u32 flags)
308{ 320{
309 int ret; 321 int ret;
310 struct v2m_data *v2m; 322 struct v2m_data *v2m;
@@ -317,6 +329,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
317 329
318 INIT_LIST_HEAD(&v2m->entry); 330 INIT_LIST_HEAD(&v2m->entry);
319 v2m->fwnode = fwnode; 331 v2m->fwnode = fwnode;
332 v2m->flags = flags;
320 333
321 memcpy(&v2m->res, res, sizeof(struct resource)); 334 memcpy(&v2m->res, res, sizeof(struct resource));
322 335
@@ -331,7 +344,14 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
331 v2m->spi_start = spi_start; 344 v2m->spi_start = spi_start;
332 v2m->nr_spis = nr_spis; 345 v2m->nr_spis = nr_spis;
333 } else { 346 } else {
334 u32 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER); 347 u32 typer;
348
349 /* Graviton should always have explicit spi_start/nr_spis */
350 if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY) {
351 ret = -EINVAL;
352 goto err_iounmap;
353 }
354 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
335 355
336 v2m->spi_start = V2M_MSI_TYPER_BASE_SPI(typer); 356 v2m->spi_start = V2M_MSI_TYPER_BASE_SPI(typer);
337 v2m->nr_spis = V2M_MSI_TYPER_NUM_SPI(typer); 357 v2m->nr_spis = V2M_MSI_TYPER_NUM_SPI(typer);
@@ -352,18 +372,21 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
352 * 372 *
353 * Broadom NS2 GICv2m implementation has an erratum where the MSI data 373 * Broadom NS2 GICv2m implementation has an erratum where the MSI data
354 * is 'spi_number - 32' 374 * is 'spi_number - 32'
375 *
376 * Reading that register fails on the Graviton implementation
355 */ 377 */
356 switch (readl_relaxed(v2m->base + V2M_MSI_IIDR)) { 378 if (!(v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)) {
357 case XGENE_GICV2M_MSI_IIDR: 379 switch (readl_relaxed(v2m->base + V2M_MSI_IIDR)) {
358 v2m->flags |= GICV2M_NEEDS_SPI_OFFSET; 380 case XGENE_GICV2M_MSI_IIDR:
359 v2m->spi_offset = v2m->spi_start; 381 v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
360 break; 382 v2m->spi_offset = v2m->spi_start;
361 case BCM_NS2_GICV2M_MSI_IIDR: 383 break;
362 v2m->flags |= GICV2M_NEEDS_SPI_OFFSET; 384 case BCM_NS2_GICV2M_MSI_IIDR:
363 v2m->spi_offset = 32; 385 v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
364 break; 386 v2m->spi_offset = 32;
387 break;
388 }
365 } 389 }
366
367 v2m->bm = kcalloc(BITS_TO_LONGS(v2m->nr_spis), sizeof(long), 390 v2m->bm = kcalloc(BITS_TO_LONGS(v2m->nr_spis), sizeof(long),
368 GFP_KERNEL); 391 GFP_KERNEL);
369 if (!v2m->bm) { 392 if (!v2m->bm) {
@@ -416,7 +439,8 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle,
416 pr_info("DT overriding V2M MSI_TYPER (base:%u, num:%u)\n", 439 pr_info("DT overriding V2M MSI_TYPER (base:%u, num:%u)\n",
417 spi_start, nr_spis); 440 spi_start, nr_spis);
418 441
419 ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, &res); 442 ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis,
443 &res, 0);
420 if (ret) { 444 if (ret) {
421 of_node_put(child); 445 of_node_put(child);
422 break; 446 break;
@@ -448,6 +472,25 @@ static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
448 return data->fwnode; 472 return data->fwnode;
449} 473}
450 474
475static bool acpi_check_amazon_graviton_quirks(void)
476{
477 static struct acpi_table_madt *madt;
478 acpi_status status;
479 bool rc = false;
480
481#define ACPI_AMZN_OEM_ID "AMAZON"
482
483 status = acpi_get_table(ACPI_SIG_MADT, 0,
484 (struct acpi_table_header **)&madt);
485
486 if (ACPI_FAILURE(status) || !madt)
487 return rc;
488 rc = !memcmp(madt->header.oem_id, ACPI_AMZN_OEM_ID, ACPI_OEM_ID_SIZE);
489 acpi_put_table((struct acpi_table_header *)madt);
490
491 return rc;
492}
493
451static int __init 494static int __init
452acpi_parse_madt_msi(union acpi_subtable_headers *header, 495acpi_parse_madt_msi(union acpi_subtable_headers *header,
453 const unsigned long end) 496 const unsigned long end)
@@ -457,6 +500,7 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header,
457 u32 spi_start = 0, nr_spis = 0; 500 u32 spi_start = 0, nr_spis = 0;
458 struct acpi_madt_generic_msi_frame *m; 501 struct acpi_madt_generic_msi_frame *m;
459 struct fwnode_handle *fwnode; 502 struct fwnode_handle *fwnode;
503 u32 flags = 0;
460 504
461 m = (struct acpi_madt_generic_msi_frame *)header; 505 m = (struct acpi_madt_generic_msi_frame *)header;
462 if (BAD_MADT_ENTRY(m, end)) 506 if (BAD_MADT_ENTRY(m, end))
@@ -466,6 +510,13 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header,
466 res.end = m->base_address + SZ_4K - 1; 510 res.end = m->base_address + SZ_4K - 1;
467 res.flags = IORESOURCE_MEM; 511 res.flags = IORESOURCE_MEM;
468 512
513 if (acpi_check_amazon_graviton_quirks()) {
514 pr_info("applying Amazon Graviton quirk\n");
515 res.end = res.start + SZ_8K - 1;
516 flags |= GICV2M_GRAVITON_ADDRESS_ONLY;
517 gicv2m_msi_domain_info.flags &= ~MSI_FLAG_MULTI_PCI_MSI;
518 }
519
469 if (m->flags & ACPI_MADT_OVERRIDE_SPI_VALUES) { 520 if (m->flags & ACPI_MADT_OVERRIDE_SPI_VALUES) {
470 spi_start = m->spi_base; 521 spi_start = m->spi_base;
471 nr_spis = m->spi_count; 522 nr_spis = m->spi_count;
@@ -480,7 +531,7 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header,
480 return -EINVAL; 531 return -EINVAL;
481 } 532 }
482 533
483 ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res); 534 ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res, flags);
484 if (ret) 535 if (ret)
485 irq_domain_free_fwnode(fwnode); 536 irq_domain_free_fwnode(fwnode);
486 537
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d29b44b677e4..35500801dc2b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -733,32 +733,43 @@ static void its_flush_cmd(struct its_node *its, struct its_cmd_block *cmd)
733} 733}
734 734
735static int its_wait_for_range_completion(struct its_node *its, 735static int its_wait_for_range_completion(struct its_node *its,
736 struct its_cmd_block *from, 736 u64 prev_idx,
737 struct its_cmd_block *to) 737 struct its_cmd_block *to)
738{ 738{
739 u64 rd_idx, from_idx, to_idx; 739 u64 rd_idx, to_idx, linear_idx;
740 u32 count = 1000000; /* 1s! */ 740 u32 count = 1000000; /* 1s! */
741 741
742 from_idx = its_cmd_ptr_to_offset(its, from); 742 /* Linearize to_idx if the command set has wrapped around */
743 to_idx = its_cmd_ptr_to_offset(its, to); 743 to_idx = its_cmd_ptr_to_offset(its, to);
744 if (to_idx < prev_idx)
745 to_idx += ITS_CMD_QUEUE_SZ;
746
747 linear_idx = prev_idx;
744 748
745 while (1) { 749 while (1) {
750 s64 delta;
751
746 rd_idx = readl_relaxed(its->base + GITS_CREADR); 752 rd_idx = readl_relaxed(its->base + GITS_CREADR);
747 753
748 /* Direct case */ 754 /*
749 if (from_idx < to_idx && rd_idx >= to_idx) 755 * Compute the read pointer progress, taking the
750 break; 756 * potential wrap-around into account.
757 */
758 delta = rd_idx - prev_idx;
759 if (rd_idx < prev_idx)
760 delta += ITS_CMD_QUEUE_SZ;
751 761
752 /* Wrapped case */ 762 linear_idx += delta;
753 if (from_idx >= to_idx && rd_idx >= to_idx && rd_idx < from_idx) 763 if (linear_idx >= to_idx)
754 break; 764 break;
755 765
756 count--; 766 count--;
757 if (!count) { 767 if (!count) {
758 pr_err_ratelimited("ITS queue timeout (%llu %llu %llu)\n", 768 pr_err_ratelimited("ITS queue timeout (%llu %llu)\n",
759 from_idx, to_idx, rd_idx); 769 to_idx, linear_idx);
760 return -1; 770 return -1;
761 } 771 }
772 prev_idx = rd_idx;
762 cpu_relax(); 773 cpu_relax();
763 udelay(1); 774 udelay(1);
764 } 775 }
@@ -775,6 +786,7 @@ void name(struct its_node *its, \
775 struct its_cmd_block *cmd, *sync_cmd, *next_cmd; \ 786 struct its_cmd_block *cmd, *sync_cmd, *next_cmd; \
776 synctype *sync_obj; \ 787 synctype *sync_obj; \
777 unsigned long flags; \ 788 unsigned long flags; \
789 u64 rd_idx; \
778 \ 790 \
779 raw_spin_lock_irqsave(&its->lock, flags); \ 791 raw_spin_lock_irqsave(&its->lock, flags); \
780 \ 792 \
@@ -796,10 +808,11 @@ void name(struct its_node *its, \
796 } \ 808 } \
797 \ 809 \
798post: \ 810post: \
811 rd_idx = readl_relaxed(its->base + GITS_CREADR); \
799 next_cmd = its_post_commands(its); \ 812 next_cmd = its_post_commands(its); \
800 raw_spin_unlock_irqrestore(&its->lock, flags); \ 813 raw_spin_unlock_irqrestore(&its->lock, flags); \
801 \ 814 \
802 if (its_wait_for_range_completion(its, cmd, next_cmd)) \ 815 if (its_wait_for_range_completion(its, rd_idx, next_cmd)) \
803 pr_err_ratelimited("ITS cmd %ps failed\n", builder); \ 816 pr_err_ratelimited("ITS cmd %ps failed\n", builder); \
804} 817}
805 818
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 6377cb864f4c..9bca4896fa6f 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -461,8 +461,12 @@ static void gic_deactivate_unhandled(u32 irqnr)
461 461
462static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) 462static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
463{ 463{
464 bool irqs_enabled = interrupts_enabled(regs);
464 int err; 465 int err;
465 466
467 if (irqs_enabled)
468 nmi_enter();
469
466 if (static_branch_likely(&supports_deactivate_key)) 470 if (static_branch_likely(&supports_deactivate_key))
467 gic_write_eoir(irqnr); 471 gic_write_eoir(irqnr);
468 /* 472 /*
@@ -474,6 +478,9 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
474 err = handle_domain_nmi(gic_data.domain, irqnr, regs); 478 err = handle_domain_nmi(gic_data.domain, irqnr, regs);
475 if (err) 479 if (err)
476 gic_deactivate_unhandled(irqnr); 480 gic_deactivate_unhandled(irqnr);
481
482 if (irqs_enabled)
483 nmi_exit();
477} 484}
478 485
479static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) 486static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
@@ -1332,6 +1339,9 @@ static int __init gic_init_bases(void __iomem *dist_base,
1332 if (gic_dist_supports_lpis()) { 1339 if (gic_dist_supports_lpis()) {
1333 its_init(handle, &gic_data.rdists, gic_data.domain); 1340 its_init(handle, &gic_data.rdists, gic_data.domain);
1334 its_cpu_init(); 1341 its_cpu_init();
1342 } else {
1343 if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
1344 gicv2m_init(handle, gic_data.domain);
1335 } 1345 }
1336 1346
1337 if (gic_prio_masking_enabled()) { 1347 if (gic_prio_masking_enabled()) {
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index a89c693d5b90..3dd28382d5f5 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -344,8 +344,7 @@ static int mbigen_device_probe(struct platform_device *pdev)
344 err = -EINVAL; 344 err = -EINVAL;
345 345
346 if (err) { 346 if (err) {
347 dev_err(&pdev->dev, "Failed to create mbi-gen@%p irqdomain", 347 dev_err(&pdev->dev, "Failed to create mbi-gen irqdomain\n");
348 mgn_chip->base);
349 return err; 348 return err;
350 } 349 }
351 350
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index 8eb92eb98f54..dcdc23b9dce6 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -60,6 +60,7 @@ static const struct of_device_id meson_irq_gpio_matches[] = {
60 { .compatible = "amlogic,meson-gxbb-gpio-intc", .data = &gxbb_params }, 60 { .compatible = "amlogic,meson-gxbb-gpio-intc", .data = &gxbb_params },
61 { .compatible = "amlogic,meson-gxl-gpio-intc", .data = &gxl_params }, 61 { .compatible = "amlogic,meson-gxl-gpio-intc", .data = &gxl_params },
62 { .compatible = "amlogic,meson-axg-gpio-intc", .data = &axg_params }, 62 { .compatible = "amlogic,meson-axg-gpio-intc", .data = &axg_params },
63 { .compatible = "amlogic,meson-g12a-gpio-intc", .data = &axg_params },
63 { } 64 { }
64}; 65};
65 66
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index d32268cc1174..f3985469c221 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -388,7 +388,7 @@ static void gic_all_vpes_irq_cpu_online(struct irq_data *d)
388 intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); 388 intr = GIC_HWIRQ_TO_LOCAL(d->hwirq);
389 cd = irq_data_get_irq_chip_data(d); 389 cd = irq_data_get_irq_chip_data(d);
390 390
391 write_gic_vl_map(intr, cd->map); 391 write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map);
392 if (cd->mask) 392 if (cd->mask)
393 write_gic_vl_smask(BIT(intr)); 393 write_gic_vl_smask(BIT(intr));
394} 394}
@@ -517,7 +517,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
517 spin_lock_irqsave(&gic_lock, flags); 517 spin_lock_irqsave(&gic_lock, flags);
518 for_each_online_cpu(cpu) { 518 for_each_online_cpu(cpu) {
519 write_gic_vl_other(mips_cm_vp_id(cpu)); 519 write_gic_vl_other(mips_cm_vp_id(cpu));
520 write_gic_vo_map(intr, map); 520 write_gic_vo_map(mips_gic_vx_map_reg(intr), map);
521 } 521 }
522 spin_unlock_irqrestore(&gic_lock, flags); 522 spin_unlock_irqrestore(&gic_lock, flags);
523 523
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 04c05a18600c..f82bc60a6793 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -508,7 +508,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
508 } 508 }
509 509
510 irq_chip = &p->irq_chip; 510 irq_chip = &p->irq_chip;
511 irq_chip->name = name; 511 irq_chip->name = "intc-irqpin";
512 irq_chip->parent_device = dev;
512 irq_chip->irq_mask = disable_fn; 513 irq_chip->irq_mask = disable_fn;
513 irq_chip->irq_unmask = enable_fn; 514 irq_chip->irq_unmask = enable_fn;
514 irq_chip->irq_set_type = intc_irqpin_irq_set_type; 515 irq_chip->irq_set_type = intc_irqpin_irq_set_type;
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index a449a7c839b3..11abc09ef76c 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -7,7 +7,6 @@
7 7
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/platform_device.h> 9#include <linux/platform_device.h>
10#include <linux/spinlock.h>
11#include <linux/interrupt.h> 10#include <linux/interrupt.h>
12#include <linux/ioport.h> 11#include <linux/ioport.h>
13#include <linux/io.h> 12#include <linux/io.h>
@@ -48,7 +47,7 @@ struct irqc_priv {
48 void __iomem *cpu_int_base; 47 void __iomem *cpu_int_base;
49 struct irqc_irq irq[IRQC_IRQ_MAX]; 48 struct irqc_irq irq[IRQC_IRQ_MAX];
50 unsigned int number_of_irqs; 49 unsigned int number_of_irqs;
51 struct platform_device *pdev; 50 struct device *dev;
52 struct irq_chip_generic *gc; 51 struct irq_chip_generic *gc;
53 struct irq_domain *irq_domain; 52 struct irq_domain *irq_domain;
54 atomic_t wakeup_path; 53 atomic_t wakeup_path;
@@ -61,8 +60,7 @@ static struct irqc_priv *irq_data_to_priv(struct irq_data *data)
61 60
62static void irqc_dbg(struct irqc_irq *i, char *str) 61static void irqc_dbg(struct irqc_irq *i, char *str)
63{ 62{
64 dev_dbg(&i->p->pdev->dev, "%s (%d:%d)\n", 63 dev_dbg(i->p->dev, "%s (%d:%d)\n", str, i->requested_irq, i->hw_irq);
65 str, i->requested_irq, i->hw_irq);
66} 64}
67 65
68static unsigned char irqc_sense[IRQ_TYPE_SENSE_MASK + 1] = { 66static unsigned char irqc_sense[IRQ_TYPE_SENSE_MASK + 1] = {
@@ -125,33 +123,22 @@ static irqreturn_t irqc_irq_handler(int irq, void *dev_id)
125 123
126static int irqc_probe(struct platform_device *pdev) 124static int irqc_probe(struct platform_device *pdev)
127{ 125{
126 struct device *dev = &pdev->dev;
127 const char *name = dev_name(dev);
128 struct irqc_priv *p; 128 struct irqc_priv *p;
129 struct resource *io;
130 struct resource *irq; 129 struct resource *irq;
131 const char *name = dev_name(&pdev->dev);
132 int ret; 130 int ret;
133 int k; 131 int k;
134 132
135 p = kzalloc(sizeof(*p), GFP_KERNEL); 133 p = devm_kzalloc(dev, sizeof(*p), GFP_KERNEL);
136 if (!p) { 134 if (!p)
137 dev_err(&pdev->dev, "failed to allocate driver data\n"); 135 return -ENOMEM;
138 ret = -ENOMEM;
139 goto err0;
140 }
141 136
142 p->pdev = pdev; 137 p->dev = dev;
143 platform_set_drvdata(pdev, p); 138 platform_set_drvdata(pdev, p);
144 139
145 pm_runtime_enable(&pdev->dev); 140 pm_runtime_enable(dev);
146 pm_runtime_get_sync(&pdev->dev); 141 pm_runtime_get_sync(dev);
147
148 /* get hold of manadatory IOMEM */
149 io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
150 if (!io) {
151 dev_err(&pdev->dev, "not enough IOMEM resources\n");
152 ret = -EINVAL;
153 goto err1;
154 }
155 142
156 /* allow any number of IRQs between 1 and IRQC_IRQ_MAX */ 143 /* allow any number of IRQs between 1 and IRQC_IRQ_MAX */
157 for (k = 0; k < IRQC_IRQ_MAX; k++) { 144 for (k = 0; k < IRQC_IRQ_MAX; k++) {
@@ -166,42 +153,41 @@ static int irqc_probe(struct platform_device *pdev)
166 153
167 p->number_of_irqs = k; 154 p->number_of_irqs = k;
168 if (p->number_of_irqs < 1) { 155 if (p->number_of_irqs < 1) {
169 dev_err(&pdev->dev, "not enough IRQ resources\n"); 156 dev_err(dev, "not enough IRQ resources\n");
170 ret = -EINVAL; 157 ret = -EINVAL;
171 goto err1; 158 goto err_runtime_pm_disable;
172 } 159 }
173 160
174 /* ioremap IOMEM and setup read/write callbacks */ 161 /* ioremap IOMEM and setup read/write callbacks */
175 p->iomem = ioremap_nocache(io->start, resource_size(io)); 162 p->iomem = devm_platform_ioremap_resource(pdev, 0);
176 if (!p->iomem) { 163 if (IS_ERR(p->iomem)) {
177 dev_err(&pdev->dev, "failed to remap IOMEM\n"); 164 ret = PTR_ERR(p->iomem);
178 ret = -ENXIO; 165 goto err_runtime_pm_disable;
179 goto err2;
180 } 166 }
181 167
182 p->cpu_int_base = p->iomem + IRQC_INT_CPU_BASE(0); /* SYS-SPI */ 168 p->cpu_int_base = p->iomem + IRQC_INT_CPU_BASE(0); /* SYS-SPI */
183 169
184 p->irq_domain = irq_domain_add_linear(pdev->dev.of_node, 170 p->irq_domain = irq_domain_add_linear(dev->of_node, p->number_of_irqs,
185 p->number_of_irqs,
186 &irq_generic_chip_ops, p); 171 &irq_generic_chip_ops, p);
187 if (!p->irq_domain) { 172 if (!p->irq_domain) {
188 ret = -ENXIO; 173 ret = -ENXIO;
189 dev_err(&pdev->dev, "cannot initialize irq domain\n"); 174 dev_err(dev, "cannot initialize irq domain\n");
190 goto err2; 175 goto err_runtime_pm_disable;
191 } 176 }
192 177
193 ret = irq_alloc_domain_generic_chips(p->irq_domain, p->number_of_irqs, 178 ret = irq_alloc_domain_generic_chips(p->irq_domain, p->number_of_irqs,
194 1, name, handle_level_irq, 179 1, "irqc", handle_level_irq,
195 0, 0, IRQ_GC_INIT_NESTED_LOCK); 180 0, 0, IRQ_GC_INIT_NESTED_LOCK);
196 if (ret) { 181 if (ret) {
197 dev_err(&pdev->dev, "cannot allocate generic chip\n"); 182 dev_err(dev, "cannot allocate generic chip\n");
198 goto err3; 183 goto err_remove_domain;
199 } 184 }
200 185
201 p->gc = irq_get_domain_generic_chip(p->irq_domain, 0); 186 p->gc = irq_get_domain_generic_chip(p->irq_domain, 0);
202 p->gc->reg_base = p->cpu_int_base; 187 p->gc->reg_base = p->cpu_int_base;
203 p->gc->chip_types[0].regs.enable = IRQC_EN_SET; 188 p->gc->chip_types[0].regs.enable = IRQC_EN_SET;
204 p->gc->chip_types[0].regs.disable = IRQC_EN_STS; 189 p->gc->chip_types[0].regs.disable = IRQC_EN_STS;
190 p->gc->chip_types[0].chip.parent_device = dev;
205 p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg; 191 p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg;
206 p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg; 192 p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg;
207 p->gc->chip_types[0].chip.irq_set_type = irqc_irq_set_type; 193 p->gc->chip_types[0].chip.irq_set_type = irqc_irq_set_type;
@@ -210,46 +196,33 @@ static int irqc_probe(struct platform_device *pdev)
210 196
211 /* request interrupts one by one */ 197 /* request interrupts one by one */
212 for (k = 0; k < p->number_of_irqs; k++) { 198 for (k = 0; k < p->number_of_irqs; k++) {
213 if (request_irq(p->irq[k].requested_irq, irqc_irq_handler, 199 if (devm_request_irq(dev, p->irq[k].requested_irq,
214 0, name, &p->irq[k])) { 200 irqc_irq_handler, 0, name, &p->irq[k])) {
215 dev_err(&pdev->dev, "failed to request IRQ\n"); 201 dev_err(dev, "failed to request IRQ\n");
216 ret = -ENOENT; 202 ret = -ENOENT;
217 goto err4; 203 goto err_remove_domain;
218 } 204 }
219 } 205 }
220 206
221 dev_info(&pdev->dev, "driving %d irqs\n", p->number_of_irqs); 207 dev_info(dev, "driving %d irqs\n", p->number_of_irqs);
222 208
223 return 0; 209 return 0;
224err4:
225 while (--k >= 0)
226 free_irq(p->irq[k].requested_irq, &p->irq[k]);
227 210
228err3: 211err_remove_domain:
229 irq_domain_remove(p->irq_domain); 212 irq_domain_remove(p->irq_domain);
230err2: 213err_runtime_pm_disable:
231 iounmap(p->iomem); 214 pm_runtime_put(dev);
232err1: 215 pm_runtime_disable(dev);
233 pm_runtime_put(&pdev->dev);
234 pm_runtime_disable(&pdev->dev);
235 kfree(p);
236err0:
237 return ret; 216 return ret;
238} 217}
239 218
240static int irqc_remove(struct platform_device *pdev) 219static int irqc_remove(struct platform_device *pdev)
241{ 220{
242 struct irqc_priv *p = platform_get_drvdata(pdev); 221 struct irqc_priv *p = platform_get_drvdata(pdev);
243 int k;
244
245 for (k = 0; k < p->number_of_irqs; k++)
246 free_irq(p->irq[k].requested_irq, &p->irq[k]);
247 222
248 irq_domain_remove(p->irq_domain); 223 irq_domain_remove(p->irq_domain);
249 iounmap(p->iomem);
250 pm_runtime_put(&pdev->dev); 224 pm_runtime_put(&pdev->dev);
251 pm_runtime_disable(&pdev->dev); 225 pm_runtime_disable(&pdev->dev);
252 kfree(p);
253 return 0; 226 return 0;
254} 227}
255 228
diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c
new file mode 100644
index 000000000000..b1f19b210190
--- /dev/null
+++ b/drivers/irqchip/irq-renesas-rza1.c
@@ -0,0 +1,283 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Renesas RZ/A1 IRQC Driver
4 *
5 * Copyright (C) 2019 Glider bvba
6 */
7
8#include <linux/err.h>
9#include <linux/init.h>
10#include <linux/interrupt.h>
11#include <linux/io.h>
12#include <linux/irqdomain.h>
13#include <linux/irq.h>
14#include <linux/module.h>
15#include <linux/of_irq.h>
16#include <linux/platform_device.h>
17#include <linux/slab.h>
18
19#include <dt-bindings/interrupt-controller/arm-gic.h>
20
21#define IRQC_NUM_IRQ 8
22
23#define ICR0 0 /* Interrupt Control Register 0 */
24
25#define ICR0_NMIL BIT(15) /* NMI Input Level (0=low, 1=high) */
26#define ICR0_NMIE BIT(8) /* Edge Select (0=falling, 1=rising) */
27#define ICR0_NMIF BIT(1) /* NMI Interrupt Request */
28
29#define ICR1 2 /* Interrupt Control Register 1 */
30
31#define ICR1_IRQS(n, sense) ((sense) << ((n) * 2)) /* IRQ Sense Select */
32#define ICR1_IRQS_LEVEL_LOW 0
33#define ICR1_IRQS_EDGE_FALLING 1
34#define ICR1_IRQS_EDGE_RISING 2
35#define ICR1_IRQS_EDGE_BOTH 3
36#define ICR1_IRQS_MASK(n) ICR1_IRQS((n), 3)
37
38#define IRQRR 4 /* IRQ Interrupt Request Register */
39
40
41struct rza1_irqc_priv {
42 struct device *dev;
43 void __iomem *base;
44 struct irq_chip chip;
45 struct irq_domain *irq_domain;
46 struct of_phandle_args map[IRQC_NUM_IRQ];
47};
48
49static struct rza1_irqc_priv *irq_data_to_priv(struct irq_data *data)
50{
51 return data->domain->host_data;
52}
53
54static void rza1_irqc_eoi(struct irq_data *d)
55{
56 struct rza1_irqc_priv *priv = irq_data_to_priv(d);
57 u16 bit = BIT(irqd_to_hwirq(d));
58 u16 tmp;
59
60 tmp = readw_relaxed(priv->base + IRQRR);
61 if (tmp & bit)
62 writew_relaxed(GENMASK(IRQC_NUM_IRQ - 1, 0) & ~bit,
63 priv->base + IRQRR);
64
65 irq_chip_eoi_parent(d);
66}
67
68static int rza1_irqc_set_type(struct irq_data *d, unsigned int type)
69{
70 struct rza1_irqc_priv *priv = irq_data_to_priv(d);
71 unsigned int hw_irq = irqd_to_hwirq(d);
72 u16 sense, tmp;
73
74 switch (type & IRQ_TYPE_SENSE_MASK) {
75 case IRQ_TYPE_LEVEL_LOW:
76 sense = ICR1_IRQS_LEVEL_LOW;
77 break;
78
79 case IRQ_TYPE_EDGE_FALLING:
80 sense = ICR1_IRQS_EDGE_FALLING;
81 break;
82
83 case IRQ_TYPE_EDGE_RISING:
84 sense = ICR1_IRQS_EDGE_RISING;
85 break;
86
87 case IRQ_TYPE_EDGE_BOTH:
88 sense = ICR1_IRQS_EDGE_BOTH;
89 break;
90
91 default:
92 return -EINVAL;
93 }
94
95 tmp = readw_relaxed(priv->base + ICR1);
96 tmp &= ~ICR1_IRQS_MASK(hw_irq);
97 tmp |= ICR1_IRQS(hw_irq, sense);
98 writew_relaxed(tmp, priv->base + ICR1);
99 return 0;
100}
101
102static int rza1_irqc_alloc(struct irq_domain *domain, unsigned int virq,
103 unsigned int nr_irqs, void *arg)
104{
105 struct rza1_irqc_priv *priv = domain->host_data;
106 struct irq_fwspec *fwspec = arg;
107 unsigned int hwirq = fwspec->param[0];
108 struct irq_fwspec spec;
109 unsigned int i;
110 int ret;
111
112 ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq, &priv->chip,
113 priv);
114 if (ret)
115 return ret;
116
117 spec.fwnode = &priv->dev->of_node->fwnode;
118 spec.param_count = priv->map[hwirq].args_count;
119 for (i = 0; i < spec.param_count; i++)
120 spec.param[i] = priv->map[hwirq].args[i];
121
122 return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &spec);
123}
124
125static int rza1_irqc_translate(struct irq_domain *domain,
126 struct irq_fwspec *fwspec, unsigned long *hwirq,
127 unsigned int *type)
128{
129 if (fwspec->param_count != 2 || fwspec->param[0] >= IRQC_NUM_IRQ)
130 return -EINVAL;
131
132 *hwirq = fwspec->param[0];
133 *type = fwspec->param[1];
134 return 0;
135}
136
137static const struct irq_domain_ops rza1_irqc_domain_ops = {
138 .alloc = rza1_irqc_alloc,
139 .translate = rza1_irqc_translate,
140};
141
142static int rza1_irqc_parse_map(struct rza1_irqc_priv *priv,
143 struct device_node *gic_node)
144{
145 unsigned int imaplen, i, j, ret;
146 struct device *dev = priv->dev;
147 struct device_node *ipar;
148 const __be32 *imap;
149 u32 intsize;
150
151 imap = of_get_property(dev->of_node, "interrupt-map", &imaplen);
152 if (!imap)
153 return -EINVAL;
154
155 for (i = 0; i < IRQC_NUM_IRQ; i++) {
156 if (imaplen < 3)
157 return -EINVAL;
158
159 /* Check interrupt number, ignore sense */
160 if (be32_to_cpup(imap) != i)
161 return -EINVAL;
162
163 ipar = of_find_node_by_phandle(be32_to_cpup(imap + 2));
164 if (ipar != gic_node) {
165 of_node_put(ipar);
166 return -EINVAL;
167 }
168
169 imap += 3;
170 imaplen -= 3;
171
172 ret = of_property_read_u32(ipar, "#interrupt-cells", &intsize);
173 of_node_put(ipar);
174 if (ret)
175 return ret;
176
177 if (imaplen < intsize)
178 return -EINVAL;
179
180 priv->map[i].args_count = intsize;
181 for (j = 0; j < intsize; j++)
182 priv->map[i].args[j] = be32_to_cpup(imap++);
183
184 imaplen -= intsize;
185 }
186
187 return 0;
188}
189
190static int rza1_irqc_probe(struct platform_device *pdev)
191{
192 struct device *dev = &pdev->dev;
193 struct device_node *np = dev->of_node;
194 struct irq_domain *parent = NULL;
195 struct device_node *gic_node;
196 struct rza1_irqc_priv *priv;
197 int ret;
198
199 priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
200 if (!priv)
201 return -ENOMEM;
202
203 platform_set_drvdata(pdev, priv);
204 priv->dev = dev;
205
206 priv->base = devm_platform_ioremap_resource(pdev, 0);
207 if (IS_ERR(priv->base))
208 return PTR_ERR(priv->base);
209
210 gic_node = of_irq_find_parent(np);
211 if (gic_node) {
212 parent = irq_find_host(gic_node);
213 of_node_put(gic_node);
214 }
215
216 if (!parent) {
217 dev_err(dev, "cannot find parent domain\n");
218 return -ENODEV;
219 }
220
221 ret = rza1_irqc_parse_map(priv, gic_node);
222 if (ret) {
223 dev_err(dev, "cannot parse %s: %d\n", "interrupt-map", ret);
224 return ret;
225 }
226
227 priv->chip.name = "rza1-irqc",
228 priv->chip.irq_mask = irq_chip_mask_parent,
229 priv->chip.irq_unmask = irq_chip_unmask_parent,
230 priv->chip.irq_eoi = rza1_irqc_eoi,
231 priv->chip.irq_retrigger = irq_chip_retrigger_hierarchy,
232 priv->chip.irq_set_type = rza1_irqc_set_type,
233 priv->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE;
234
235 priv->irq_domain = irq_domain_add_hierarchy(parent, 0, IRQC_NUM_IRQ,
236 np, &rza1_irqc_domain_ops,
237 priv);
238 if (!priv->irq_domain) {
239 dev_err(dev, "cannot initialize irq domain\n");
240 return -ENOMEM;
241 }
242
243 return 0;
244}
245
246static int rza1_irqc_remove(struct platform_device *pdev)
247{
248 struct rza1_irqc_priv *priv = platform_get_drvdata(pdev);
249
250 irq_domain_remove(priv->irq_domain);
251 return 0;
252}
253
254static const struct of_device_id rza1_irqc_dt_ids[] = {
255 { .compatible = "renesas,rza1-irqc" },
256 {},
257};
258MODULE_DEVICE_TABLE(of, rza1_irqc_dt_ids);
259
260static struct platform_driver rza1_irqc_device_driver = {
261 .probe = rza1_irqc_probe,
262 .remove = rza1_irqc_remove,
263 .driver = {
264 .name = "renesas_rza1_irqc",
265 .of_match_table = rza1_irqc_dt_ids,
266 }
267};
268
269static int __init rza1_irqc_init(void)
270{
271 return platform_driver_register(&rza1_irqc_device_driver);
272}
273postcore_initcall(rza1_irqc_init);
274
275static void __exit rza1_irqc_exit(void)
276{
277 platform_driver_unregister(&rza1_irqc_device_driver);
278}
279module_exit(rza1_irqc_exit);
280
281MODULE_AUTHOR("Geert Uytterhoeven <geert+renesas@glider.be>");
282MODULE_DESCRIPTION("Renesas RZ/A1 IRQC Driver");
283MODULE_LICENSE("GPL v2");
diff --git a/drivers/irqchip/irq-sni-exiu.c b/drivers/irqchip/irq-sni-exiu.c
index 4e983bc6cf93..1d027623c776 100644
--- a/drivers/irqchip/irq-sni-exiu.c
+++ b/drivers/irqchip/irq-sni-exiu.c
@@ -2,7 +2,7 @@
2/* 2/*
3 * Driver for Socionext External Interrupt Unit (EXIU) 3 * Driver for Socionext External Interrupt Unit (EXIU)
4 * 4 *
5 * Copyright (c) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> 5 * Copyright (c) 2017-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
6 * 6 *
7 * Based on irq-tegra.c: 7 * Based on irq-tegra.c:
8 * Copyright (C) 2011 Google, Inc. 8 * Copyright (C) 2011 Google, Inc.
@@ -17,6 +17,7 @@
17#include <linux/of.h> 17#include <linux/of.h>
18#include <linux/of_address.h> 18#include <linux/of_address.h>
19#include <linux/of_irq.h> 19#include <linux/of_irq.h>
20#include <linux/platform_device.h>
20 21
21#include <dt-bindings/interrupt-controller/arm-gic.h> 22#include <dt-bindings/interrupt-controller/arm-gic.h>
22 23
@@ -131,9 +132,13 @@ static int exiu_domain_translate(struct irq_domain *domain,
131 132
132 *hwirq = fwspec->param[1] - info->spi_base; 133 *hwirq = fwspec->param[1] - info->spi_base;
133 *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; 134 *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
134 return 0; 135 } else {
136 if (fwspec->param_count != 2)
137 return -EINVAL;
138 *hwirq = fwspec->param[0];
139 *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
135 } 140 }
136 return -EINVAL; 141 return 0;
137} 142}
138 143
139static int exiu_domain_alloc(struct irq_domain *dom, unsigned int virq, 144static int exiu_domain_alloc(struct irq_domain *dom, unsigned int virq,
@@ -144,16 +149,21 @@ static int exiu_domain_alloc(struct irq_domain *dom, unsigned int virq,
144 struct exiu_irq_data *info = dom->host_data; 149 struct exiu_irq_data *info = dom->host_data;
145 irq_hw_number_t hwirq; 150 irq_hw_number_t hwirq;
146 151
147 if (fwspec->param_count != 3) 152 parent_fwspec = *fwspec;
148 return -EINVAL; /* Not GIC compliant */ 153 if (is_of_node(dom->parent->fwnode)) {
149 if (fwspec->param[0] != GIC_SPI) 154 if (fwspec->param_count != 3)
150 return -EINVAL; /* No PPI should point to this domain */ 155 return -EINVAL; /* Not GIC compliant */
156 if (fwspec->param[0] != GIC_SPI)
157 return -EINVAL; /* No PPI should point to this domain */
151 158
159 hwirq = fwspec->param[1] - info->spi_base;
160 } else {
161 hwirq = fwspec->param[0];
162 parent_fwspec.param[0] = hwirq + info->spi_base + 32;
163 }
152 WARN_ON(nr_irqs != 1); 164 WARN_ON(nr_irqs != 1);
153 hwirq = fwspec->param[1] - info->spi_base;
154 irq_domain_set_hwirq_and_chip(dom, virq, hwirq, &exiu_irq_chip, info); 165 irq_domain_set_hwirq_and_chip(dom, virq, hwirq, &exiu_irq_chip, info);
155 166
156 parent_fwspec = *fwspec;
157 parent_fwspec.fwnode = dom->parent->fwnode; 167 parent_fwspec.fwnode = dom->parent->fwnode;
158 return irq_domain_alloc_irqs_parent(dom, virq, nr_irqs, &parent_fwspec); 168 return irq_domain_alloc_irqs_parent(dom, virq, nr_irqs, &parent_fwspec);
159} 169}
@@ -164,35 +174,23 @@ static const struct irq_domain_ops exiu_domain_ops = {
164 .free = irq_domain_free_irqs_common, 174 .free = irq_domain_free_irqs_common,
165}; 175};
166 176
167static int __init exiu_init(struct device_node *node, 177static struct exiu_irq_data *exiu_init(const struct fwnode_handle *fwnode,
168 struct device_node *parent) 178 struct resource *res)
169{ 179{
170 struct irq_domain *parent_domain, *domain;
171 struct exiu_irq_data *data; 180 struct exiu_irq_data *data;
172 int err; 181 int err;
173 182
174 if (!parent) {
175 pr_err("%pOF: no parent, giving up\n", node);
176 return -ENODEV;
177 }
178
179 parent_domain = irq_find_host(parent);
180 if (!parent_domain) {
181 pr_err("%pOF: unable to obtain parent domain\n", node);
182 return -ENXIO;
183 }
184
185 data = kzalloc(sizeof(*data), GFP_KERNEL); 183 data = kzalloc(sizeof(*data), GFP_KERNEL);
186 if (!data) 184 if (!data)
187 return -ENOMEM; 185 return ERR_PTR(-ENOMEM);
188 186
189 if (of_property_read_u32(node, "socionext,spi-base", &data->spi_base)) { 187 if (fwnode_property_read_u32_array(fwnode, "socionext,spi-base",
190 pr_err("%pOF: failed to parse 'spi-base' property\n", node); 188 &data->spi_base, 1)) {
191 err = -ENODEV; 189 err = -ENODEV;
192 goto out_free; 190 goto out_free;
193 } 191 }
194 192
195 data->base = of_iomap(node, 0); 193 data->base = ioremap(res->start, resource_size(res));
196 if (!data->base) { 194 if (!data->base) {
197 err = -ENODEV; 195 err = -ENODEV;
198 goto out_free; 196 goto out_free;
@@ -202,11 +200,44 @@ static int __init exiu_init(struct device_node *node,
202 writel_relaxed(0xFFFFFFFF, data->base + EIREQCLR); 200 writel_relaxed(0xFFFFFFFF, data->base + EIREQCLR);
203 writel_relaxed(0xFFFFFFFF, data->base + EIMASK); 201 writel_relaxed(0xFFFFFFFF, data->base + EIMASK);
204 202
203 return data;
204
205out_free:
206 kfree(data);
207 return ERR_PTR(err);
208}
209
210static int __init exiu_dt_init(struct device_node *node,
211 struct device_node *parent)
212{
213 struct irq_domain *parent_domain, *domain;
214 struct exiu_irq_data *data;
215 struct resource res;
216
217 if (!parent) {
218 pr_err("%pOF: no parent, giving up\n", node);
219 return -ENODEV;
220 }
221
222 parent_domain = irq_find_host(parent);
223 if (!parent_domain) {
224 pr_err("%pOF: unable to obtain parent domain\n", node);
225 return -ENXIO;
226 }
227
228 if (of_address_to_resource(node, 0, &res)) {
229 pr_err("%pOF: failed to parse memory resource\n", node);
230 return -ENXIO;
231 }
232
233 data = exiu_init(of_node_to_fwnode(node), &res);
234 if (IS_ERR(data))
235 return PTR_ERR(data);
236
205 domain = irq_domain_add_hierarchy(parent_domain, 0, NUM_IRQS, node, 237 domain = irq_domain_add_hierarchy(parent_domain, 0, NUM_IRQS, node,
206 &exiu_domain_ops, data); 238 &exiu_domain_ops, data);
207 if (!domain) { 239 if (!domain) {
208 pr_err("%pOF: failed to allocate domain\n", node); 240 pr_err("%pOF: failed to allocate domain\n", node);
209 err = -ENOMEM;
210 goto out_unmap; 241 goto out_unmap;
211 } 242 }
212 243
@@ -217,8 +248,57 @@ static int __init exiu_init(struct device_node *node,
217 248
218out_unmap: 249out_unmap:
219 iounmap(data->base); 250 iounmap(data->base);
220out_free:
221 kfree(data); 251 kfree(data);
222 return err; 252 return -ENOMEM;
223} 253}
224IRQCHIP_DECLARE(exiu, "socionext,synquacer-exiu", exiu_init); 254IRQCHIP_DECLARE(exiu, "socionext,synquacer-exiu", exiu_dt_init);
255
256#ifdef CONFIG_ACPI
257static int exiu_acpi_probe(struct platform_device *pdev)
258{
259 struct irq_domain *domain;
260 struct exiu_irq_data *data;
261 struct resource *res;
262
263 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
264 if (!res) {
265 dev_err(&pdev->dev, "failed to parse memory resource\n");
266 return -ENXIO;
267 }
268
269 data = exiu_init(dev_fwnode(&pdev->dev), res);
270 if (IS_ERR(data))
271 return PTR_ERR(data);
272
273 domain = acpi_irq_create_hierarchy(0, NUM_IRQS, dev_fwnode(&pdev->dev),
274 &exiu_domain_ops, data);
275 if (!domain) {
276 dev_err(&pdev->dev, "failed to create IRQ domain\n");
277 goto out_unmap;
278 }
279
280 dev_info(&pdev->dev, "%d interrupts forwarded\n", NUM_IRQS);
281
282 return 0;
283
284out_unmap:
285 iounmap(data->base);
286 kfree(data);
287 return -ENOMEM;
288}
289
290static const struct acpi_device_id exiu_acpi_ids[] = {
291 { "SCX0008" },
292 { /* sentinel */ }
293};
294MODULE_DEVICE_TABLE(acpi, exiu_acpi_ids);
295
296static struct platform_driver exiu_driver = {
297 .driver = {
298 .name = "exiu",
299 .acpi_match_table = exiu_acpi_ids,
300 },
301 .probe = exiu_acpi_probe,
302};
303builtin_platform_driver(exiu_driver);
304#endif
diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
index 011b60a49e3f..ef4d625d2d80 100644
--- a/drivers/irqchip/irq-ti-sci-inta.c
+++ b/drivers/irqchip/irq-ti-sci-inta.c
@@ -159,9 +159,9 @@ static struct ti_sci_inta_vint_desc *ti_sci_inta_alloc_parent_irq(struct irq_dom
159 parent_fwspec.param[1] = vint_desc->vint_id; 159 parent_fwspec.param[1] = vint_desc->vint_id;
160 160
161 parent_virq = irq_create_fwspec_mapping(&parent_fwspec); 161 parent_virq = irq_create_fwspec_mapping(&parent_fwspec);
162 if (parent_virq <= 0) { 162 if (parent_virq == 0) {
163 kfree(vint_desc); 163 kfree(vint_desc);
164 return ERR_PTR(parent_virq); 164 return ERR_PTR(-EINVAL);
165 } 165 }
166 vint_desc->parent_virq = parent_virq; 166 vint_desc->parent_virq = parent_virq;
167 167
diff --git a/drivers/irqchip/qcom-irq-combiner.c b/drivers/irqchip/qcom-irq-combiner.c
index 067337ab3f20..d88e993aa66d 100644
--- a/drivers/irqchip/qcom-irq-combiner.c
+++ b/drivers/irqchip/qcom-irq-combiner.c
@@ -229,7 +229,6 @@ static int get_registers(struct platform_device *pdev, struct combiner *comb)
229static int __init combiner_probe(struct platform_device *pdev) 229static int __init combiner_probe(struct platform_device *pdev)
230{ 230{
231 struct combiner *combiner; 231 struct combiner *combiner;
232 size_t alloc_sz;
233 int nregs; 232 int nregs;
234 int err; 233 int err;
235 234
@@ -239,8 +238,8 @@ static int __init combiner_probe(struct platform_device *pdev)
239 return -EINVAL; 238 return -EINVAL;
240 } 239 }
241 240
242 alloc_sz = sizeof(*combiner) + sizeof(struct combiner_reg) * nregs; 241 combiner = devm_kzalloc(&pdev->dev, struct_size(combiner, regs, nregs),
243 combiner = devm_kzalloc(&pdev->dev, alloc_sz, GFP_KERNEL); 242 GFP_KERNEL);
244 if (!combiner) 243 if (!combiner)
245 return -ENOMEM; 244 return -ENOMEM;
246 245
diff --git a/drivers/leds/trigger/ledtrig-activity.c b/drivers/leds/trigger/ledtrig-activity.c
index 4c8b0c3cf284..6a72b7e13719 100644
--- a/drivers/leds/trigger/ledtrig-activity.c
+++ b/drivers/leds/trigger/ledtrig-activity.c
@@ -70,7 +70,7 @@ static void led_activity_function(struct timer_list *t)
70 * down to 16us, ensuring we won't overflow 32-bit computations below 70 * down to 16us, ensuring we won't overflow 32-bit computations below
71 * even up to 3k CPUs, while keeping divides cheap on smaller systems. 71 * even up to 3k CPUs, while keeping divides cheap on smaller systems.
72 */ 72 */
73 curr_boot = ktime_get_boot_ns() * cpus; 73 curr_boot = ktime_get_boottime_ns() * cpus;
74 diff_boot = (curr_boot - activity_data->last_boot) >> 16; 74 diff_boot = (curr_boot - activity_data->last_boot) >> 16;
75 diff_used = (curr_used - activity_data->last_used) >> 16; 75 diff_used = (curr_used - activity_data->last_used) >> 16;
76 activity_data->last_boot = curr_boot; 76 activity_data->last_boot = curr_boot;
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index 352e803f566e..728733a514c7 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -140,8 +140,8 @@ static char __init *dm_parse_table_entry(struct dm_device *dev, char *str)
140 return ERR_PTR(-EINVAL); 140 return ERR_PTR(-EINVAL);
141 } 141 }
142 /* target_args */ 142 /* target_args */
143 dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL, 143 dev->target_args_array[n] = kstrndup(field[3], DM_MAX_STR_SIZE,
144 DM_MAX_STR_SIZE); 144 GFP_KERNEL);
145 if (!dev->target_args_array[n]) 145 if (!dev->target_args_array[n])
146 return ERR_PTR(-ENOMEM); 146 return ERR_PTR(-ENOMEM);
147 147
@@ -272,10 +272,10 @@ static int __init dm_init_init(void)
272 return 0; 272 return 0;
273 273
274 if (strlen(create) >= DM_MAX_STR_SIZE) { 274 if (strlen(create) >= DM_MAX_STR_SIZE) {
275 DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE); 275 DMERR("Argument is too big. Limit is %d", DM_MAX_STR_SIZE);
276 return -EINVAL; 276 return -EINVAL;
277 } 277 }
278 str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE); 278 str = kstrndup(create, DM_MAX_STR_SIZE, GFP_KERNEL);
279 if (!str) 279 if (!str)
280 return -ENOMEM; 280 return -ENOMEM;
281 281
@@ -283,7 +283,7 @@ static int __init dm_init_init(void)
283 if (r) 283 if (r)
284 goto out; 284 goto out;
285 285
286 DMINFO("waiting for all devices to be available before creating mapped devices\n"); 286 DMINFO("waiting for all devices to be available before creating mapped devices");
287 wait_for_device_probe(); 287 wait_for_device_probe();
288 288
289 list_for_each_entry(dev, &devices, list) { 289 list_for_each_entry(dev, &devices, list) {
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 9ea2b0291f20..e549392e0ea5 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -60,6 +60,7 @@
60 60
61#define WRITE_LOG_VERSION 1ULL 61#define WRITE_LOG_VERSION 1ULL
62#define WRITE_LOG_MAGIC 0x6a736677736872ULL 62#define WRITE_LOG_MAGIC 0x6a736677736872ULL
63#define WRITE_LOG_SUPER_SECTOR 0
63 64
64/* 65/*
65 * The disk format for this is braindead simple. 66 * The disk format for this is braindead simple.
@@ -115,6 +116,7 @@ struct log_writes_c {
115 struct list_head logging_blocks; 116 struct list_head logging_blocks;
116 wait_queue_head_t wait; 117 wait_queue_head_t wait;
117 struct task_struct *log_kthread; 118 struct task_struct *log_kthread;
119 struct completion super_done;
118}; 120};
119 121
120struct pending_block { 122struct pending_block {
@@ -180,6 +182,14 @@ static void log_end_io(struct bio *bio)
180 bio_put(bio); 182 bio_put(bio);
181} 183}
182 184
185static void log_end_super(struct bio *bio)
186{
187 struct log_writes_c *lc = bio->bi_private;
188
189 complete(&lc->super_done);
190 log_end_io(bio);
191}
192
183/* 193/*
184 * Meant to be called if there is an error, it will free all the pages 194 * Meant to be called if there is an error, it will free all the pages
185 * associated with the block. 195 * associated with the block.
@@ -215,7 +225,8 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
215 bio->bi_iter.bi_size = 0; 225 bio->bi_iter.bi_size = 0;
216 bio->bi_iter.bi_sector = sector; 226 bio->bi_iter.bi_sector = sector;
217 bio_set_dev(bio, lc->logdev->bdev); 227 bio_set_dev(bio, lc->logdev->bdev);
218 bio->bi_end_io = log_end_io; 228 bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
229 log_end_super : log_end_io;
219 bio->bi_private = lc; 230 bio->bi_private = lc;
220 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 231 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
221 232
@@ -418,11 +429,18 @@ static int log_super(struct log_writes_c *lc)
418 super.nr_entries = cpu_to_le64(lc->logged_entries); 429 super.nr_entries = cpu_to_le64(lc->logged_entries);
419 super.sectorsize = cpu_to_le32(lc->sectorsize); 430 super.sectorsize = cpu_to_le32(lc->sectorsize);
420 431
421 if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) { 432 if (write_metadata(lc, &super, sizeof(super), NULL, 0,
433 WRITE_LOG_SUPER_SECTOR)) {
422 DMERR("Couldn't write super"); 434 DMERR("Couldn't write super");
423 return -1; 435 return -1;
424 } 436 }
425 437
438 /*
439 * Super sector should be writen in-order, otherwise the
440 * nr_entries could be rewritten incorrectly by an old bio.
441 */
442 wait_for_completion_io(&lc->super_done);
443
426 return 0; 444 return 0;
427} 445}
428 446
@@ -531,6 +549,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
531 INIT_LIST_HEAD(&lc->unflushed_blocks); 549 INIT_LIST_HEAD(&lc->unflushed_blocks);
532 INIT_LIST_HEAD(&lc->logging_blocks); 550 INIT_LIST_HEAD(&lc->logging_blocks);
533 init_waitqueue_head(&lc->wait); 551 init_waitqueue_head(&lc->wait);
552 init_completion(&lc->super_done);
534 atomic_set(&lc->io_blocks, 0); 553 atomic_set(&lc->io_blocks, 0);
535 atomic_set(&lc->pending_blocks, 0); 554 atomic_set(&lc->pending_blocks, 0);
536 555
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 350cf0451456..ec8b27e20de3 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -561,7 +561,7 @@ static char **realloc_argv(unsigned *size, char **old_argv)
561 gfp = GFP_NOIO; 561 gfp = GFP_NOIO;
562 } 562 }
563 argv = kmalloc_array(new_size, sizeof(*argv), gfp); 563 argv = kmalloc_array(new_size, sizeof(*argv), gfp);
564 if (argv) { 564 if (argv && old_argv) {
565 memcpy(argv, old_argv, *size * sizeof(*argv)); 565 memcpy(argv, old_argv, *size * sizeof(*argv));
566 *size = new_size; 566 *size = new_size;
567 } 567 }
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 720d06531aa3..ea24ff0612e3 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -235,8 +235,8 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
235 BUG(); 235 BUG();
236 } 236 }
237 237
238 DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str, 238 DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name,
239 block); 239 type_str, block);
240 240
241 if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) 241 if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
242 DMERR("%s: reached maximum errors", v->data_dev->name); 242 DMERR("%s: reached maximum errors", v->data_dev->name);
diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c
index fe8efba2d45f..857991cb3cbb 100644
--- a/drivers/mfd/stmfx.c
+++ b/drivers/mfd/stmfx.c
@@ -204,12 +204,11 @@ static struct irq_chip stmfx_irq_chip = {
204static irqreturn_t stmfx_irq_handler(int irq, void *data) 204static irqreturn_t stmfx_irq_handler(int irq, void *data)
205{ 205{
206 struct stmfx *stmfx = data; 206 struct stmfx *stmfx = data;
207 unsigned long n, pending; 207 unsigned long bits;
208 u32 ack; 208 u32 pending, ack;
209 int ret; 209 int n, ret;
210 210
211 ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING, 211 ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING, &pending);
212 (u32 *)&pending);
213 if (ret) 212 if (ret)
214 return IRQ_NONE; 213 return IRQ_NONE;
215 214
@@ -224,7 +223,8 @@ static irqreturn_t stmfx_irq_handler(int irq, void *data)
224 return IRQ_NONE; 223 return IRQ_NONE;
225 } 224 }
226 225
227 for_each_set_bit(n, &pending, STMFX_REG_IRQ_SRC_MAX) 226 bits = pending;
227 for_each_set_bit(n, &bits, STMFX_REG_IRQ_SRC_MAX)
228 handle_nested_irq(irq_find_mapping(stmfx->irq_domain, n)); 228 handle_nested_irq(irq_find_mapping(stmfx->irq_domain, n));
229 229
230 return IRQ_HANDLED; 230 return IRQ_HANDLED;
diff --git a/drivers/mtd/nand/raw/ingenic/Kconfig b/drivers/mtd/nand/raw/ingenic/Kconfig
index 19a96ce515c1..66b7cffdb0c2 100644
--- a/drivers/mtd/nand/raw/ingenic/Kconfig
+++ b/drivers/mtd/nand/raw/ingenic/Kconfig
@@ -16,7 +16,7 @@ config MTD_NAND_JZ4780
16if MTD_NAND_JZ4780 16if MTD_NAND_JZ4780
17 17
18config MTD_NAND_INGENIC_ECC 18config MTD_NAND_INGENIC_ECC
19 tristate 19 bool
20 20
21config MTD_NAND_JZ4740_ECC 21config MTD_NAND_JZ4740_ECC
22 tristate "Hardware BCH support for JZ4740 SoC" 22 tristate "Hardware BCH support for JZ4740 SoC"
diff --git a/drivers/mtd/nand/raw/ingenic/Makefile b/drivers/mtd/nand/raw/ingenic/Makefile
index 1ac4f455baea..b63d36889263 100644
--- a/drivers/mtd/nand/raw/ingenic/Makefile
+++ b/drivers/mtd/nand/raw/ingenic/Makefile
@@ -2,7 +2,9 @@
2obj-$(CONFIG_MTD_NAND_JZ4740) += jz4740_nand.o 2obj-$(CONFIG_MTD_NAND_JZ4740) += jz4740_nand.o
3obj-$(CONFIG_MTD_NAND_JZ4780) += ingenic_nand.o 3obj-$(CONFIG_MTD_NAND_JZ4780) += ingenic_nand.o
4 4
5obj-$(CONFIG_MTD_NAND_INGENIC_ECC) += ingenic_ecc.o 5ingenic_nand-y += ingenic_nand_drv.o
6ingenic_nand-$(CONFIG_MTD_NAND_INGENIC_ECC) += ingenic_ecc.o
7
6obj-$(CONFIG_MTD_NAND_JZ4740_ECC) += jz4740_ecc.o 8obj-$(CONFIG_MTD_NAND_JZ4740_ECC) += jz4740_ecc.o
7obj-$(CONFIG_MTD_NAND_JZ4725B_BCH) += jz4725b_bch.o 9obj-$(CONFIG_MTD_NAND_JZ4725B_BCH) += jz4725b_bch.o
8obj-$(CONFIG_MTD_NAND_JZ4780_BCH) += jz4780_bch.o 10obj-$(CONFIG_MTD_NAND_JZ4780_BCH) += jz4780_bch.o
diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
index d3e085c5685a..c954189606f6 100644
--- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
+++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
@@ -30,7 +30,6 @@ int ingenic_ecc_calculate(struct ingenic_ecc *ecc,
30{ 30{
31 return ecc->ops->calculate(ecc, params, buf, ecc_code); 31 return ecc->ops->calculate(ecc, params, buf, ecc_code);
32} 32}
33EXPORT_SYMBOL(ingenic_ecc_calculate);
34 33
35/** 34/**
36 * ingenic_ecc_correct() - detect and correct bit errors 35 * ingenic_ecc_correct() - detect and correct bit errors
@@ -51,7 +50,6 @@ int ingenic_ecc_correct(struct ingenic_ecc *ecc,
51{ 50{
52 return ecc->ops->correct(ecc, params, buf, ecc_code); 51 return ecc->ops->correct(ecc, params, buf, ecc_code);
53} 52}
54EXPORT_SYMBOL(ingenic_ecc_correct);
55 53
56/** 54/**
57 * ingenic_ecc_get() - get the ECC controller device 55 * ingenic_ecc_get() - get the ECC controller device
@@ -111,7 +109,6 @@ struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *of_node)
111 } 109 }
112 return ecc; 110 return ecc;
113} 111}
114EXPORT_SYMBOL(of_ingenic_ecc_get);
115 112
116/** 113/**
117 * ingenic_ecc_release() - release the ECC controller device 114 * ingenic_ecc_release() - release the ECC controller device
@@ -122,7 +119,6 @@ void ingenic_ecc_release(struct ingenic_ecc *ecc)
122 clk_disable_unprepare(ecc->clk); 119 clk_disable_unprepare(ecc->clk);
123 put_device(ecc->dev); 120 put_device(ecc->dev);
124} 121}
125EXPORT_SYMBOL(ingenic_ecc_release);
126 122
127int ingenic_ecc_probe(struct platform_device *pdev) 123int ingenic_ecc_probe(struct platform_device *pdev)
128{ 124{
@@ -159,8 +155,3 @@ int ingenic_ecc_probe(struct platform_device *pdev)
159 return 0; 155 return 0;
160} 156}
161EXPORT_SYMBOL(ingenic_ecc_probe); 157EXPORT_SYMBOL(ingenic_ecc_probe);
162
163MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>");
164MODULE_AUTHOR("Harvey Hunt <harveyhuntnexus@gmail.com>");
165MODULE_DESCRIPTION("Ingenic ECC common driver");
166MODULE_LICENSE("GPL v2");
diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_nand.c b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
index d7b7c0f13909..d7b7c0f13909 100644
--- a/drivers/mtd/nand/raw/ingenic/ingenic_nand.c
+++ b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index b5b68aa16eb3..6eb131292eb2 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4662,7 +4662,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
4662 memorg = nanddev_get_memorg(&chip->base); 4662 memorg = nanddev_get_memorg(&chip->base);
4663 memorg->planes_per_lun = 1; 4663 memorg->planes_per_lun = 1;
4664 memorg->luns_per_target = 1; 4664 memorg->luns_per_target = 1;
4665 memorg->ntargets = 1;
4666 4665
4667 /* 4666 /*
4668 * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx) 4667 * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
@@ -5027,6 +5026,8 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
5027 if (ret) 5026 if (ret)
5028 return ret; 5027 return ret;
5029 5028
5029 memorg->ntargets = maxchips;
5030
5030 /* Read the flash type */ 5031 /* Read the flash type */
5031 ret = nand_detect(chip, table); 5032 ret = nand_detect(chip, table);
5032 if (ret) { 5033 if (ret) {
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index b021a5720b42..89773293c64d 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -51,6 +51,7 @@
51#define NFC_REG_USER_DATA(x) (0x0050 + ((x) * 4)) 51#define NFC_REG_USER_DATA(x) (0x0050 + ((x) * 4))
52#define NFC_REG_SPARE_AREA 0x00A0 52#define NFC_REG_SPARE_AREA 0x00A0
53#define NFC_REG_PAT_ID 0x00A4 53#define NFC_REG_PAT_ID 0x00A4
54#define NFC_REG_MDMA_CNT 0x00C4
54#define NFC_RAM0_BASE 0x0400 55#define NFC_RAM0_BASE 0x0400
55#define NFC_RAM1_BASE 0x0800 56#define NFC_RAM1_BASE 0x0800
56 57
@@ -69,6 +70,7 @@
69#define NFC_PAGE_SHIFT(x) (((x) < 10 ? 0 : (x) - 10) << 8) 70#define NFC_PAGE_SHIFT(x) (((x) < 10 ? 0 : (x) - 10) << 8)
70#define NFC_SAM BIT(12) 71#define NFC_SAM BIT(12)
71#define NFC_RAM_METHOD BIT(14) 72#define NFC_RAM_METHOD BIT(14)
73#define NFC_DMA_TYPE_NORMAL BIT(15)
72#define NFC_DEBUG_CTL BIT(31) 74#define NFC_DEBUG_CTL BIT(31)
73 75
74/* define bit use in NFC_ST */ 76/* define bit use in NFC_ST */
@@ -205,14 +207,13 @@ static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand)
205 * NAND Controller capabilities structure: stores NAND controller capabilities 207 * NAND Controller capabilities structure: stores NAND controller capabilities
206 * for distinction between compatible strings. 208 * for distinction between compatible strings.
207 * 209 *
208 * @sram_through_ahb: On A23, we choose to access the internal RAM through AHB 210 * @extra_mbus_conf: Contrary to A10, A10s and A13, accessing internal RAM
209 * instead of MBUS (less configuration). A10, A10s, A13 and 211 * through MBUS on A23/A33 needs extra configuration.
210 * A20 use the MBUS but no extra configuration is needed.
211 * @reg_io_data: I/O data register 212 * @reg_io_data: I/O data register
212 * @dma_maxburst: DMA maxburst 213 * @dma_maxburst: DMA maxburst
213 */ 214 */
214struct sunxi_nfc_caps { 215struct sunxi_nfc_caps {
215 bool sram_through_ahb; 216 bool extra_mbus_conf;
216 unsigned int reg_io_data; 217 unsigned int reg_io_data;
217 unsigned int dma_maxburst; 218 unsigned int dma_maxburst;
218}; 219};
@@ -368,28 +369,12 @@ static int sunxi_nfc_dma_op_prepare(struct sunxi_nfc *nfc, const void *buf,
368 goto err_unmap_buf; 369 goto err_unmap_buf;
369 } 370 }
370 371
371 /* 372 writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD,
372 * On A23, we suppose the "internal RAM" (p.12 of the NFC user manual) 373 nfc->regs + NFC_REG_CTL);
373 * refers to the NAND controller's internal SRAM. This memory is mapped
374 * and so is accessible from the AHB. It seems that it can also be
375 * accessed by the MBUS. MBUS accesses are mandatory when using the
376 * internal DMA instead of the external DMA engine.
377 *
378 * During DMA I/O operation, either we access this memory from the AHB
379 * by clearing the NFC_RAM_METHOD bit, or we set the bit and use the
380 * MBUS. In this case, we should also configure the MBUS DMA length
381 * NFC_REG_MDMA_CNT(0xC4) to be chunksize * nchunks. NAND I/O over MBUS
382 * are also limited to 32kiB pages.
383 */
384 if (nfc->caps->sram_through_ahb)
385 writel(readl(nfc->regs + NFC_REG_CTL) & ~NFC_RAM_METHOD,
386 nfc->regs + NFC_REG_CTL);
387 else
388 writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD,
389 nfc->regs + NFC_REG_CTL);
390
391 writel(nchunks, nfc->regs + NFC_REG_SECTOR_NUM); 374 writel(nchunks, nfc->regs + NFC_REG_SECTOR_NUM);
392 writel(chunksize, nfc->regs + NFC_REG_CNT); 375 writel(chunksize, nfc->regs + NFC_REG_CNT);
376 if (nfc->caps->extra_mbus_conf)
377 writel(chunksize * nchunks, nfc->regs + NFC_REG_MDMA_CNT);
393 378
394 dmat = dmaengine_submit(dmad); 379 dmat = dmaengine_submit(dmad);
395 380
@@ -2151,6 +2136,11 @@ static int sunxi_nfc_probe(struct platform_device *pdev)
2151 dmac_cfg.src_maxburst = nfc->caps->dma_maxburst; 2136 dmac_cfg.src_maxburst = nfc->caps->dma_maxburst;
2152 dmac_cfg.dst_maxburst = nfc->caps->dma_maxburst; 2137 dmac_cfg.dst_maxburst = nfc->caps->dma_maxburst;
2153 dmaengine_slave_config(nfc->dmac, &dmac_cfg); 2138 dmaengine_slave_config(nfc->dmac, &dmac_cfg);
2139
2140 if (nfc->caps->extra_mbus_conf)
2141 writel(readl(nfc->regs + NFC_REG_CTL) |
2142 NFC_DMA_TYPE_NORMAL, nfc->regs + NFC_REG_CTL);
2143
2154 } else { 2144 } else {
2155 dev_warn(dev, "failed to request rxtx DMA channel\n"); 2145 dev_warn(dev, "failed to request rxtx DMA channel\n");
2156 } 2146 }
@@ -2200,7 +2190,7 @@ static const struct sunxi_nfc_caps sunxi_nfc_a10_caps = {
2200}; 2190};
2201 2191
2202static const struct sunxi_nfc_caps sunxi_nfc_a23_caps = { 2192static const struct sunxi_nfc_caps sunxi_nfc_a23_caps = {
2203 .sram_through_ahb = true, 2193 .extra_mbus_conf = true,
2204 .reg_io_data = NFC_REG_A23_IO_DATA, 2194 .reg_io_data = NFC_REG_A23_IO_DATA,
2205 .dma_maxburst = 8, 2195 .dma_maxburst = 8,
2206}; 2196};
diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c
index e5586390026a..e6c646007cda 100644
--- a/drivers/mtd/nand/spi/gigadevice.c
+++ b/drivers/mtd/nand/spi/gigadevice.c
@@ -180,7 +180,7 @@ static const struct spinand_info gigadevice_spinand_table[] = {
180 SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout, 180 SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout,
181 gd5fxgq4xa_ecc_get_status)), 181 gd5fxgq4xa_ecc_get_status)),
182 SPINAND_INFO("GD5F4GQ4xA", 0xF4, 182 SPINAND_INFO("GD5F4GQ4xA", 0xF4,
183 NAND_MEMORG(1, 2048, 64, 64, 4096, 40, 1, 1, 1), 183 NAND_MEMORG(1, 2048, 64, 64, 4096, 80, 1, 1, 1),
184 NAND_ECCREQ(8, 512), 184 NAND_ECCREQ(8, 512),
185 SPINAND_INFO_OP_VARIANTS(&read_cache_variants, 185 SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
186 &write_cache_variants, 186 &write_cache_variants,
diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c
index 6502727049a8..21def3f8fb36 100644
--- a/drivers/mtd/nand/spi/macronix.c
+++ b/drivers/mtd/nand/spi/macronix.c
@@ -100,7 +100,7 @@ static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand,
100 100
101static const struct spinand_info macronix_spinand_table[] = { 101static const struct spinand_info macronix_spinand_table[] = {
102 SPINAND_INFO("MX35LF1GE4AB", 0x12, 102 SPINAND_INFO("MX35LF1GE4AB", 0x12,
103 NAND_MEMORG(1, 2048, 64, 64, 1024, 40, 1, 1, 1), 103 NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
104 NAND_ECCREQ(4, 512), 104 NAND_ECCREQ(4, 512),
105 SPINAND_INFO_OP_VARIANTS(&read_cache_variants, 105 SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
106 &write_cache_variants, 106 &write_cache_variants,
@@ -109,7 +109,7 @@ static const struct spinand_info macronix_spinand_table[] = {
109 SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, 109 SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
110 mx35lf1ge4ab_ecc_get_status)), 110 mx35lf1ge4ab_ecc_get_status)),
111 SPINAND_INFO("MX35LF2GE4AB", 0x22, 111 SPINAND_INFO("MX35LF2GE4AB", 0x22,
112 NAND_MEMORG(1, 2048, 64, 64, 2048, 20, 2, 1, 1), 112 NAND_MEMORG(1, 2048, 64, 64, 2048, 40, 2, 1, 1),
113 NAND_ECCREQ(4, 512), 113 NAND_ECCREQ(4, 512),
114 SPINAND_INFO_OP_VARIANTS(&read_cache_variants, 114 SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
115 &write_cache_variants, 115 &write_cache_variants,
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 73172d7f512b..0c2ec1c21434 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1636,6 +1636,95 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
1636 return 0; 1636 return 0;
1637} 1637}
1638 1638
1639/**
1640 * spi_nor_clear_sr_bp() - clear the Status Register Block Protection bits.
1641 * @nor: pointer to a 'struct spi_nor'
1642 *
1643 * Read-modify-write function that clears the Block Protection bits from the
1644 * Status Register without affecting other bits.
1645 *
1646 * Return: 0 on success, -errno otherwise.
1647 */
1648static int spi_nor_clear_sr_bp(struct spi_nor *nor)
1649{
1650 int ret;
1651 u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
1652
1653 ret = read_sr(nor);
1654 if (ret < 0) {
1655 dev_err(nor->dev, "error while reading status register\n");
1656 return ret;
1657 }
1658
1659 write_enable(nor);
1660
1661 ret = write_sr(nor, ret & ~mask);
1662 if (ret) {
1663 dev_err(nor->dev, "write to status register failed\n");
1664 return ret;
1665 }
1666
1667 ret = spi_nor_wait_till_ready(nor);
1668 if (ret)
1669 dev_err(nor->dev, "timeout while writing status register\n");
1670 return ret;
1671}
1672
1673/**
1674 * spi_nor_spansion_clear_sr_bp() - clear the Status Register Block Protection
1675 * bits on spansion flashes.
1676 * @nor: pointer to a 'struct spi_nor'
1677 *
1678 * Read-modify-write function that clears the Block Protection bits from the
1679 * Status Register without affecting other bits. The function is tightly
1680 * coupled with the spansion_quad_enable() function. Both assume that the Write
1681 * Register with 16 bits, together with the Read Configuration Register (35h)
1682 * instructions are supported.
1683 *
1684 * Return: 0 on success, -errno otherwise.
1685 */
1686static int spi_nor_spansion_clear_sr_bp(struct spi_nor *nor)
1687{
1688 int ret;
1689 u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
1690 u8 sr_cr[2] = {0};
1691
1692 /* Check current Quad Enable bit value. */
1693 ret = read_cr(nor);
1694 if (ret < 0) {
1695 dev_err(nor->dev,
1696 "error while reading configuration register\n");
1697 return ret;
1698 }
1699
1700 /*
1701 * When the configuration register Quad Enable bit is one, only the
1702 * Write Status (01h) command with two data bytes may be used.
1703 */
1704 if (ret & CR_QUAD_EN_SPAN) {
1705 sr_cr[1] = ret;
1706
1707 ret = read_sr(nor);
1708 if (ret < 0) {
1709 dev_err(nor->dev,
1710 "error while reading status register\n");
1711 return ret;
1712 }
1713 sr_cr[0] = ret & ~mask;
1714
1715 ret = write_sr_cr(nor, sr_cr);
1716 if (ret)
1717 dev_err(nor->dev, "16-bit write register failed\n");
1718 return ret;
1719 }
1720
1721 /*
1722 * If the Quad Enable bit is zero, use the Write Status (01h) command
1723 * with one data byte.
1724 */
1725 return spi_nor_clear_sr_bp(nor);
1726}
1727
1639/* Used when the "_ext_id" is two bytes at most */ 1728/* Used when the "_ext_id" is two bytes at most */
1640#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \ 1729#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \
1641 .id = { \ 1730 .id = { \
@@ -3660,6 +3749,8 @@ static int spi_nor_init_params(struct spi_nor *nor,
3660 default: 3749 default:
3661 /* Kept only for backward compatibility purpose. */ 3750 /* Kept only for backward compatibility purpose. */
3662 params->quad_enable = spansion_quad_enable; 3751 params->quad_enable = spansion_quad_enable;
3752 if (nor->clear_sr_bp)
3753 nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp;
3663 break; 3754 break;
3664 } 3755 }
3665 3756
@@ -3912,17 +4003,13 @@ static int spi_nor_init(struct spi_nor *nor)
3912{ 4003{
3913 int err; 4004 int err;
3914 4005
3915 /* 4006 if (nor->clear_sr_bp) {
3916 * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up 4007 err = nor->clear_sr_bp(nor);
3917 * with the software protection bits set 4008 if (err) {
3918 */ 4009 dev_err(nor->dev,
3919 if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL || 4010 "fail to clear block protection bits\n");
3920 JEDEC_MFR(nor->info) == SNOR_MFR_INTEL || 4011 return err;
3921 JEDEC_MFR(nor->info) == SNOR_MFR_SST || 4012 }
3922 nor->info->flags & SPI_NOR_HAS_LOCK) {
3923 write_enable(nor);
3924 write_sr(nor, 0);
3925 spi_nor_wait_till_ready(nor);
3926 } 4013 }
3927 4014
3928 if (nor->quad_enable) { 4015 if (nor->quad_enable) {
@@ -4047,6 +4134,16 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
4047 if (info->flags & SPI_S3AN) 4134 if (info->flags & SPI_S3AN)
4048 nor->flags |= SNOR_F_READY_XSR_RDY; 4135 nor->flags |= SNOR_F_READY_XSR_RDY;
4049 4136
4137 /*
4138 * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
4139 * with the software protection bits set.
4140 */
4141 if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL ||
4142 JEDEC_MFR(nor->info) == SNOR_MFR_INTEL ||
4143 JEDEC_MFR(nor->info) == SNOR_MFR_SST ||
4144 nor->info->flags & SPI_NOR_HAS_LOCK)
4145 nor->clear_sr_bp = spi_nor_clear_sr_bp;
4146
4050 /* Parse the Serial Flash Discoverable Parameters table. */ 4147 /* Parse the Serial Flash Discoverable Parameters table. */
4051 ret = spi_nor_init_params(nor, &params); 4148 ret = spi_nor_init_params(nor, &params);
4052 if (ret) 4149 if (ret)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 407f4095a37a..799fc38c5c34 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4320,12 +4320,12 @@ void bond_setup(struct net_device *bond_dev)
4320 bond_dev->features |= NETIF_F_NETNS_LOCAL; 4320 bond_dev->features |= NETIF_F_NETNS_LOCAL;
4321 4321
4322 bond_dev->hw_features = BOND_VLAN_FEATURES | 4322 bond_dev->hw_features = BOND_VLAN_FEATURES |
4323 NETIF_F_HW_VLAN_CTAG_TX |
4324 NETIF_F_HW_VLAN_CTAG_RX | 4323 NETIF_F_HW_VLAN_CTAG_RX |
4325 NETIF_F_HW_VLAN_CTAG_FILTER; 4324 NETIF_F_HW_VLAN_CTAG_FILTER;
4326 4325
4327 bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; 4326 bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
4328 bond_dev->features |= bond_dev->hw_features; 4327 bond_dev->features |= bond_dev->hw_features;
4328 bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
4329} 4329}
4330 4330
4331/* Destroy a bonding device. 4331/* Destroy a bonding device.
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index f46086fa9064..db91b213eae1 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -436,9 +436,9 @@ int ksz_switch_register(struct ksz_device *dev,
436 return PTR_ERR(dev->reset_gpio); 436 return PTR_ERR(dev->reset_gpio);
437 437
438 if (dev->reset_gpio) { 438 if (dev->reset_gpio) {
439 gpiod_set_value(dev->reset_gpio, 1); 439 gpiod_set_value_cansleep(dev->reset_gpio, 1);
440 mdelay(10); 440 mdelay(10);
441 gpiod_set_value(dev->reset_gpio, 0); 441 gpiod_set_value_cansleep(dev->reset_gpio, 0);
442 } 442 }
443 443
444 mutex_init(&dev->dev_mutex); 444 mutex_init(&dev->dev_mutex);
@@ -487,7 +487,7 @@ void ksz_switch_remove(struct ksz_device *dev)
487 dsa_unregister_switch(dev->ds); 487 dsa_unregister_switch(dev->ds);
488 488
489 if (dev->reset_gpio) 489 if (dev->reset_gpio)
490 gpiod_set_value(dev->reset_gpio, 1); 490 gpiod_set_value_cansleep(dev->reset_gpio, 1);
491 491
492} 492}
493EXPORT_SYMBOL(ksz_switch_remove); 493EXPORT_SYMBOL(ksz_switch_remove);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 18bc035da850..1fff462a4175 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -843,9 +843,14 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic)
843 return err; 843 return err;
844 844
845 if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { 845 if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
846 if (hweight < AQ_VLAN_MAX_FILTERS) 846 if (hweight < AQ_VLAN_MAX_FILTERS && hweight > 0) {
847 err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, true); 847 err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw,
848 !(aq_nic->packet_filter & IFF_PROMISC));
849 aq_nic->aq_nic_cfg.is_vlan_force_promisc = false;
850 } else {
848 /* otherwise left in promiscue mode */ 851 /* otherwise left in promiscue mode */
852 aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
853 }
849 } 854 }
850 855
851 return err; 856 return err;
@@ -866,6 +871,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic)
866 if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl)) 871 if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl))
867 return -EOPNOTSUPP; 872 return -EOPNOTSUPP;
868 873
874 aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
869 err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false); 875 err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false);
870 if (err) 876 if (err)
871 return err; 877 return err;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 0da5e161ec5d..41172fbebddd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -126,6 +126,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
126 126
127 cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk; 127 cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk;
128 cfg->features = cfg->aq_hw_caps->hw_features; 128 cfg->features = cfg->aq_hw_caps->hw_features;
129 cfg->is_vlan_force_promisc = true;
129} 130}
130 131
131static int aq_nic_update_link_status(struct aq_nic_s *self) 132static int aq_nic_update_link_status(struct aq_nic_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index eb2e3c7c36f9..0f22f5d5691b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -35,6 +35,7 @@ struct aq_nic_cfg_s {
35 u32 flow_control; 35 u32 flow_control;
36 u32 link_speed_msk; 36 u32 link_speed_msk;
37 u32 wol; 37 u32 wol;
38 bool is_vlan_force_promisc;
38 u16 is_mc_list_enabled; 39 u16 is_mc_list_enabled;
39 u16 mc_list_count; 40 u16 mc_list_count;
40 bool is_autoneg; 41 bool is_autoneg;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 1c7593d54035..13ac2661a473 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -778,8 +778,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
778 unsigned int packet_filter) 778 unsigned int packet_filter)
779{ 779{
780 unsigned int i = 0U; 780 unsigned int i = 0U;
781 struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
782
783 hw_atl_rpfl2promiscuous_mode_en_set(self,
784 IS_FILTER_ENABLED(IFF_PROMISC));
785
786 hw_atl_rpf_vlan_prom_mode_en_set(self,
787 IS_FILTER_ENABLED(IFF_PROMISC) ||
788 cfg->is_vlan_force_promisc);
781 789
782 hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC));
783 hw_atl_rpfl2multicast_flr_en_set(self, 790 hw_atl_rpfl2multicast_flr_en_set(self,
784 IS_FILTER_ENABLED(IFF_ALLMULTI), 0); 791 IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
785 792
@@ -788,13 +795,13 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
788 795
789 hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST)); 796 hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
790 797
791 self->aq_nic_cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); 798 cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
792 799
793 for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i) 800 for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i)
794 hw_atl_rpfl2_uc_flr_en_set(self, 801 hw_atl_rpfl2_uc_flr_en_set(self,
795 (self->aq_nic_cfg->is_mc_list_enabled && 802 (cfg->is_mc_list_enabled &&
796 (i <= self->aq_nic_cfg->mc_list_count)) ? 803 (i <= cfg->mc_list_count)) ?
797 1U : 0U, i); 804 1U : 0U, i);
798 805
799 return aq_hw_err_from_flags(self); 806 return aq_hw_err_from_flags(self);
800} 807}
@@ -1086,7 +1093,7 @@ static int hw_atl_b0_hw_vlan_set(struct aq_hw_s *self,
1086static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable) 1093static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
1087{ 1094{
1088 /* set promisc in case of disabing the vland filter */ 1095 /* set promisc in case of disabing the vland filter */
1089 hw_atl_rpf_vlan_prom_mode_en_set(self, !!!enable); 1096 hw_atl_rpf_vlan_prom_mode_en_set(self, !enable);
1090 1097
1091 return aq_hw_err_from_flags(self); 1098 return aq_hw_err_from_flags(self);
1092} 1099}
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 2375a13bb446..262a28ff81fc 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4180,7 +4180,7 @@ static int macb_probe(struct platform_device *pdev)
4180 if (PTR_ERR(mac) == -EPROBE_DEFER) { 4180 if (PTR_ERR(mac) == -EPROBE_DEFER) {
4181 err = -EPROBE_DEFER; 4181 err = -EPROBE_DEFER;
4182 goto err_out_free_netdev; 4182 goto err_out_free_netdev;
4183 } else if (!IS_ERR(mac)) { 4183 } else if (!IS_ERR_OR_NULL(mac)) {
4184 ether_addr_copy(bp->dev->dev_addr, mac); 4184 ether_addr_copy(bp->dev->dev_addr, mac);
4185 } else { 4185 } else {
4186 macb_get_hwaddr(bp); 4186 macb_get_hwaddr(bp);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 8a6785173228..492f8769ac12 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -891,7 +891,7 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test,
891 u64 *data) 891 u64 *data)
892{ 892{
893 struct be_adapter *adapter = netdev_priv(netdev); 893 struct be_adapter *adapter = netdev_priv(netdev);
894 int status; 894 int status, cnt;
895 u8 link_status = 0; 895 u8 link_status = 0;
896 896
897 if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) { 897 if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
@@ -902,6 +902,9 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test,
902 902
903 memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM); 903 memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM);
904 904
905 /* check link status before offline tests */
906 link_status = netif_carrier_ok(netdev);
907
905 if (test->flags & ETH_TEST_FL_OFFLINE) { 908 if (test->flags & ETH_TEST_FL_OFFLINE) {
906 if (be_loopback_test(adapter, BE_MAC_LOOPBACK, &data[0]) != 0) 909 if (be_loopback_test(adapter, BE_MAC_LOOPBACK, &data[0]) != 0)
907 test->flags |= ETH_TEST_FL_FAILED; 910 test->flags |= ETH_TEST_FL_FAILED;
@@ -922,13 +925,26 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test,
922 test->flags |= ETH_TEST_FL_FAILED; 925 test->flags |= ETH_TEST_FL_FAILED;
923 } 926 }
924 927
925 status = be_cmd_link_status_query(adapter, NULL, &link_status, 0); 928 /* link status was down prior to test */
926 if (status) { 929 if (!link_status) {
927 test->flags |= ETH_TEST_FL_FAILED;
928 data[4] = -1;
929 } else if (!link_status) {
930 test->flags |= ETH_TEST_FL_FAILED; 930 test->flags |= ETH_TEST_FL_FAILED;
931 data[4] = 1; 931 data[4] = 1;
932 return;
933 }
934
935 for (cnt = 10; cnt; cnt--) {
936 status = be_cmd_link_status_query(adapter, NULL, &link_status,
937 0);
938 if (status) {
939 test->flags |= ETH_TEST_FL_FAILED;
940 data[4] = -1;
941 break;
942 }
943
944 if (link_status)
945 break;
946
947 msleep_interruptible(500);
932 } 948 }
933} 949}
934 950
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 67f9bb6e941b..9b036c857b1d 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -1057,7 +1057,7 @@ sis900_open(struct net_device *net_dev)
1057 sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED); 1057 sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
1058 1058
1059 /* Enable all known interrupts by setting the interrupt mask. */ 1059 /* Enable all known interrupts by setting the interrupt mask. */
1060 sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); 1060 sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
1061 sw32(cr, RxENA | sr32(cr)); 1061 sw32(cr, RxENA | sr32(cr));
1062 sw32(ier, IE); 1062 sw32(ier, IE);
1063 1063
@@ -1578,7 +1578,7 @@ static void sis900_tx_timeout(struct net_device *net_dev)
1578 sw32(txdp, sis_priv->tx_ring_dma); 1578 sw32(txdp, sis_priv->tx_ring_dma);
1579 1579
1580 /* Enable all known interrupts by setting the interrupt mask. */ 1580 /* Enable all known interrupts by setting the interrupt mask. */
1581 sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); 1581 sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
1582} 1582}
1583 1583
1584/** 1584/**
@@ -1618,7 +1618,7 @@ sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
1618 spin_unlock_irqrestore(&sis_priv->lock, flags); 1618 spin_unlock_irqrestore(&sis_priv->lock, flags);
1619 return NETDEV_TX_OK; 1619 return NETDEV_TX_OK;
1620 } 1620 }
1621 sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len); 1621 sis_priv->tx_ring[entry].cmdsts = (OWN | INTR | skb->len);
1622 sw32(cr, TxENA | sr32(cr)); 1622 sw32(cr, TxENA | sr32(cr));
1623 1623
1624 sis_priv->cur_tx ++; 1624 sis_priv->cur_tx ++;
@@ -1674,7 +1674,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance)
1674 do { 1674 do {
1675 status = sr32(isr); 1675 status = sr32(isr);
1676 1676
1677 if ((status & (HIBERR|TxURN|TxERR|TxIDLE|RxORN|RxERR|RxOK)) == 0) 1677 if ((status & (HIBERR|TxURN|TxERR|TxIDLE|TxDESC|RxORN|RxERR|RxOK)) == 0)
1678 /* nothing intresting happened */ 1678 /* nothing intresting happened */
1679 break; 1679 break;
1680 handled = 1; 1680 handled = 1;
@@ -1684,7 +1684,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance)
1684 /* Rx interrupt */ 1684 /* Rx interrupt */
1685 sis900_rx(net_dev); 1685 sis900_rx(net_dev);
1686 1686
1687 if (status & (TxURN | TxERR | TxIDLE)) 1687 if (status & (TxURN | TxERR | TxIDLE | TxDESC))
1688 /* Tx interrupt */ 1688 /* Tx interrupt */
1689 sis900_finish_xmit(net_dev); 1689 sis900_finish_xmit(net_dev);
1690 1690
@@ -1896,8 +1896,8 @@ static void sis900_finish_xmit (struct net_device *net_dev)
1896 1896
1897 if (tx_status & OWN) { 1897 if (tx_status & OWN) {
1898 /* The packet is not transmitted yet (owned by hardware) ! 1898 /* The packet is not transmitted yet (owned by hardware) !
1899 * Note: the interrupt is generated only when Tx Machine 1899 * Note: this is an almost impossible condition
1900 * is idle, so this is an almost impossible case */ 1900 * in case of TxDESC ('descriptor interrupt') */
1901 break; 1901 break;
1902 } 1902 }
1903 1903
@@ -2473,7 +2473,7 @@ static int sis900_resume(struct pci_dev *pci_dev)
2473 sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED); 2473 sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
2474 2474
2475 /* Enable all known interrupts by setting the interrupt mask. */ 2475 /* Enable all known interrupts by setting the interrupt mask. */
2476 sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); 2476 sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
2477 sw32(cr, RxENA | sr32(cr)); 2477 sw32(cr, RxENA | sr32(cr));
2478 sw32(ier, IE); 2478 sw32(ier, IE);
2479 2479
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 2dcdf761d525..020159622559 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -112,7 +112,7 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
112 * programmed with (2^32 – <new_sec_value>) 112 * programmed with (2^32 – <new_sec_value>)
113 */ 113 */
114 if (gmac4) 114 if (gmac4)
115 sec = (100000000ULL - sec); 115 sec = -sec;
116 116
117 value = readl(ioaddr + PTP_TCR); 117 value = readl(ioaddr + PTP_TCR);
118 if (value & PTP_TCR_TSCTRLSSR) 118 if (value & PTP_TCR_TSCTRLSSR)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 06dd51f47cfd..06358fe5b245 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2947,12 +2947,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
2947 2947
2948 /* Manage tx mitigation */ 2948 /* Manage tx mitigation */
2949 tx_q->tx_count_frames += nfrags + 1; 2949 tx_q->tx_count_frames += nfrags + 1;
2950 if (priv->tx_coal_frames <= tx_q->tx_count_frames) { 2950 if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
2951 !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
2952 (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
2953 priv->hwts_tx_en)) {
2954 stmmac_tx_timer_arm(priv, queue);
2955 } else {
2956 tx_q->tx_count_frames = 0;
2951 stmmac_set_tx_ic(priv, desc); 2957 stmmac_set_tx_ic(priv, desc);
2952 priv->xstats.tx_set_ic_bit++; 2958 priv->xstats.tx_set_ic_bit++;
2953 tx_q->tx_count_frames = 0;
2954 } else {
2955 stmmac_tx_timer_arm(priv, queue);
2956 } 2959 }
2957 2960
2958 skb_tx_timestamp(skb); 2961 skb_tx_timestamp(skb);
@@ -3166,12 +3169,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
3166 * element in case of no SG. 3169 * element in case of no SG.
3167 */ 3170 */
3168 tx_q->tx_count_frames += nfrags + 1; 3171 tx_q->tx_count_frames += nfrags + 1;
3169 if (priv->tx_coal_frames <= tx_q->tx_count_frames) { 3172 if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
3173 !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
3174 (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
3175 priv->hwts_tx_en)) {
3176 stmmac_tx_timer_arm(priv, queue);
3177 } else {
3178 tx_q->tx_count_frames = 0;
3170 stmmac_set_tx_ic(priv, desc); 3179 stmmac_set_tx_ic(priv, desc);
3171 priv->xstats.tx_set_ic_bit++; 3180 priv->xstats.tx_set_ic_bit++;
3172 tx_q->tx_count_frames = 0;
3173 } else {
3174 stmmac_tx_timer_arm(priv, queue);
3175 } 3181 }
3176 3182
3177 skb_tx_timestamp(skb); 3183 skb_tx_timestamp(skb);
diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c
index ff61dd8748de..66c8e65f6872 100644
--- a/drivers/net/ppp/ppp_mppe.c
+++ b/drivers/net/ppp/ppp_mppe.c
@@ -63,6 +63,7 @@ MODULE_AUTHOR("Frank Cusack <fcusack@fcusack.com>");
63MODULE_DESCRIPTION("Point-to-Point Protocol Microsoft Point-to-Point Encryption support"); 63MODULE_DESCRIPTION("Point-to-Point Protocol Microsoft Point-to-Point Encryption support");
64MODULE_LICENSE("Dual BSD/GPL"); 64MODULE_LICENSE("Dual BSD/GPL");
65MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE)); 65MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE));
66MODULE_SOFTDEP("pre: arc4");
66MODULE_VERSION("1.0.2"); 67MODULE_VERSION("1.0.2");
67 68
68static unsigned int 69static unsigned int
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index b48006e7fa2f..36916bf51ee6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2128,12 +2128,12 @@ static void team_setup(struct net_device *dev)
2128 dev->features |= NETIF_F_NETNS_LOCAL; 2128 dev->features |= NETIF_F_NETNS_LOCAL;
2129 2129
2130 dev->hw_features = TEAM_VLAN_FEATURES | 2130 dev->hw_features = TEAM_VLAN_FEATURES |
2131 NETIF_F_HW_VLAN_CTAG_TX |
2132 NETIF_F_HW_VLAN_CTAG_RX | 2131 NETIF_F_HW_VLAN_CTAG_RX |
2133 NETIF_F_HW_VLAN_CTAG_FILTER; 2132 NETIF_F_HW_VLAN_CTAG_FILTER;
2134 2133
2135 dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; 2134 dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
2136 dev->features |= dev->hw_features; 2135 dev->features |= dev->hw_features;
2136 dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
2137} 2137}
2138 2138
2139static int team_newlink(struct net *src_net, struct net_device *dev, 2139static int team_newlink(struct net *src_net, struct net_device *dev,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index d080f8048e52..8b4ad10cf940 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1482,7 +1482,7 @@ static int qmi_wwan_probe(struct usb_interface *intf,
1482 * different. Ignore the current interface if the number of endpoints 1482 * different. Ignore the current interface if the number of endpoints
1483 * equals the number for the diag interface (two). 1483 * equals the number for the diag interface (two).
1484 */ 1484 */
1485 info = (void *)&id->driver_info; 1485 info = (void *)id->driver_info;
1486 1486
1487 if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { 1487 if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
1488 if (desc->bNumEndpoints == 2) 1488 if (desc->bNumEndpoints == 2)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 11b9525dff27..311b0cc6eb98 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -350,8 +350,8 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
350{ 350{
351 struct dst_entry *dst = skb_dst(skb); 351 struct dst_entry *dst = skb_dst(skb);
352 struct net_device *dev = dst->dev; 352 struct net_device *dev = dst->dev;
353 const struct in6_addr *nexthop;
353 struct neighbour *neigh; 354 struct neighbour *neigh;
354 struct in6_addr *nexthop;
355 int ret; 355 int ret;
356 356
357 nf_reset(skb); 357 nf_reset(skb);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index fec38a47696e..9f4b117db9d7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -93,7 +93,7 @@ void iwl_mvm_ftm_restart(struct iwl_mvm *mvm)
93 struct cfg80211_pmsr_result result = { 93 struct cfg80211_pmsr_result result = {
94 .status = NL80211_PMSR_STATUS_FAILURE, 94 .status = NL80211_PMSR_STATUS_FAILURE,
95 .final = 1, 95 .final = 1,
96 .host_time = ktime_get_boot_ns(), 96 .host_time = ktime_get_boottime_ns(),
97 .type = NL80211_PMSR_TYPE_FTM, 97 .type = NL80211_PMSR_TYPE_FTM,
98 }; 98 };
99 int i; 99 int i;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index fbd3014e8b82..160b0db27103 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -555,7 +555,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi,
555 555
556 if (unlikely(ieee80211_is_beacon(hdr->frame_control) || 556 if (unlikely(ieee80211_is_beacon(hdr->frame_control) ||
557 ieee80211_is_probe_resp(hdr->frame_control))) 557 ieee80211_is_probe_resp(hdr->frame_control)))
558 rx_status->boottime_ns = ktime_get_boot_ns(); 558 rx_status->boottime_ns = ktime_get_boottime_ns();
559 559
560 /* Take a reference briefly to kick off a d0i3 entry delay so 560 /* Take a reference briefly to kick off a d0i3 entry delay so
561 * we can handle bursts of RX packets without toggling the 561 * we can handle bursts of RX packets without toggling the
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 1824566d08fc..64f950501287 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -1684,7 +1684,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
1684 1684
1685 if (unlikely(ieee80211_is_beacon(hdr->frame_control) || 1685 if (unlikely(ieee80211_is_beacon(hdr->frame_control) ||
1686 ieee80211_is_probe_resp(hdr->frame_control))) 1686 ieee80211_is_probe_resp(hdr->frame_control)))
1687 rx_status->boottime_ns = ktime_get_boot_ns(); 1687 rx_status->boottime_ns = ktime_get_boottime_ns();
1688 } 1688 }
1689 1689
1690 if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) { 1690 if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index cc56ab88fb43..72cd5b3f2d8d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1445,7 +1445,7 @@ void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime)
1445 } 1445 }
1446 1446
1447 *gp2 = iwl_mvm_get_systime(mvm); 1447 *gp2 = iwl_mvm_get_systime(mvm);
1448 *boottime = ktime_get_boot_ns(); 1448 *boottime = ktime_get_boottime_ns();
1449 1449
1450 if (!ps_disabled) { 1450 if (!ps_disabled) {
1451 mvm->ps_disabled = ps_disabled; 1451 mvm->ps_disabled = ps_disabled;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1c699a9fa866..a7bf6519d7aa 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1271,7 +1271,7 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw,
1271 */ 1271 */
1272 if (ieee80211_is_beacon(hdr->frame_control) || 1272 if (ieee80211_is_beacon(hdr->frame_control) ||
1273 ieee80211_is_probe_resp(hdr->frame_control)) { 1273 ieee80211_is_probe_resp(hdr->frame_control)) {
1274 rx_status.boottime_ns = ktime_get_boot_ns(); 1274 rx_status.boottime_ns = ktime_get_boottime_ns();
1275 now = data->abs_bcn_ts; 1275 now = data->abs_bcn_ts;
1276 } else { 1276 } else {
1277 now = mac80211_hwsim_get_tsf_raw(); 1277 now = mac80211_hwsim_get_tsf_raw();
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index c9a485ecee7b..b74dc8bc9755 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -483,7 +483,7 @@ static int wlcore_fw_status(struct wl1271 *wl, struct wl_fw_status *status)
483 } 483 }
484 484
485 /* update the host-chipset time offset */ 485 /* update the host-chipset time offset */
486 wl->time_offset = (ktime_get_boot_ns() >> 10) - 486 wl->time_offset = (ktime_get_boottime_ns() >> 10) -
487 (s64)(status->fw_localtime); 487 (s64)(status->fw_localtime);
488 488
489 wl->fw_fast_lnk_map = status->link_fast_bitmap; 489 wl->fw_fast_lnk_map = status->link_fast_bitmap;
diff --git a/drivers/net/wireless/ti/wlcore/rx.c b/drivers/net/wireless/ti/wlcore/rx.c
index d96bb602fae6..307fab21050b 100644
--- a/drivers/net/wireless/ti/wlcore/rx.c
+++ b/drivers/net/wireless/ti/wlcore/rx.c
@@ -93,7 +93,7 @@ static void wl1271_rx_status(struct wl1271 *wl,
93 } 93 }
94 94
95 if (beacon || probe_rsp) 95 if (beacon || probe_rsp)
96 status->boottime_ns = ktime_get_boot_ns(); 96 status->boottime_ns = ktime_get_boottime_ns();
97 97
98 if (beacon) 98 if (beacon)
99 wlcore_set_pending_regdomain_ch(wl, (u16)desc->channel, 99 wlcore_set_pending_regdomain_ch(wl, (u16)desc->channel,
diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
index 057c6be330e7..90e56d4c3df3 100644
--- a/drivers/net/wireless/ti/wlcore/tx.c
+++ b/drivers/net/wireless/ti/wlcore/tx.c
@@ -273,7 +273,7 @@ static void wl1271_tx_fill_hdr(struct wl1271 *wl, struct wl12xx_vif *wlvif,
273 } 273 }
274 274
275 /* configure packet life time */ 275 /* configure packet life time */
276 hosttime = (ktime_get_boot_ns() >> 10); 276 hosttime = (ktime_get_boottime_ns() >> 10);
277 desc->start_time = cpu_to_le32(hosttime - wl->time_offset); 277 desc->start_time = cpu_to_le32(hosttime - wl->time_offset);
278 278
279 is_dummy = wl12xx_is_dummy_packet(wl, skb); 279 is_dummy = wl12xx_is_dummy_packet(wl, skb);
diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c
index 606999f102eb..be92e1220284 100644
--- a/drivers/net/wireless/virt_wifi.c
+++ b/drivers/net/wireless/virt_wifi.c
@@ -172,7 +172,7 @@ static void virt_wifi_scan_result(struct work_struct *work)
172 informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, 172 informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
173 CFG80211_BSS_FTYPE_PRESP, 173 CFG80211_BSS_FTYPE_PRESP,
174 fake_router_bssid, 174 fake_router_bssid,
175 ktime_get_boot_ns(), 175 ktime_get_boottime_ns(),
176 WLAN_CAPABILITY_ESS, 0, 176 WLAN_CAPABILITY_ESS, 0,
177 (void *)&ssid, sizeof(ssid), 177 (void *)&ssid, sizeof(ssid),
178 DBM_TO_MBM(-50), GFP_KERNEL); 178 DBM_TO_MBM(-50), GFP_KERNEL);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 98af9ecd4a90..ca3793002e2f 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -859,7 +859,7 @@ static int pci_pm_suspend_noirq(struct device *dev)
859 pci_dev->bus->self->skip_bus_pm = true; 859 pci_dev->bus->self->skip_bus_pm = true;
860 } 860 }
861 861
862 if (pci_dev->skip_bus_pm && !pm_suspend_via_firmware()) { 862 if (pci_dev->skip_bus_pm && pm_suspend_no_platform()) {
863 dev_dbg(dev, "PCI PM: Skipped\n"); 863 dev_dbg(dev, "PCI PM: Skipped\n");
864 goto Fixup; 864 goto Fixup;
865 } 865 }
@@ -914,10 +914,10 @@ static int pci_pm_resume_noirq(struct device *dev)
914 /* 914 /*
915 * In the suspend-to-idle case, devices left in D0 during suspend will 915 * In the suspend-to-idle case, devices left in D0 during suspend will
916 * stay in D0, so it is not necessary to restore or update their 916 * stay in D0, so it is not necessary to restore or update their
917 * configuration here and attempting to put them into D0 again may 917 * configuration here and attempting to put them into D0 again is
918 * confuse some firmware, so avoid doing that. 918 * pointless, so avoid doing that.
919 */ 919 */
920 if (!pci_dev->skip_bus_pm || pm_suspend_via_firmware()) 920 if (!(pci_dev->skip_bus_pm && pm_suspend_no_platform()))
921 pci_pm_default_resume_early(pci_dev); 921 pci_pm_default_resume_early(pci_dev);
922 922
923 pci_fixup_device(pci_fixup_resume_early, pci_dev); 923 pci_fixup_device(pci_fixup_resume_early, pci_dev);
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e4221a107dca..09ae8a970880 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -71,6 +71,14 @@ config ARM_DSU_PMU
71 system, control logic. The PMU allows counting various events related 71 system, control logic. The PMU allows counting various events related
72 to DSU. 72 to DSU.
73 73
74config FSL_IMX8_DDR_PMU
75 tristate "Freescale i.MX8 DDR perf monitor"
76 depends on ARCH_MXC
77 help
78 Provides support for the DDR performance monitor in i.MX8, which
79 can give information about memory throughput and other related
80 events.
81
74config HISI_PMU 82config HISI_PMU
75 bool "HiSilicon SoC PMU" 83 bool "HiSilicon SoC PMU"
76 depends on ARM64 && ACPI 84 depends on ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 30489941f3d6..2ebb4de17815 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
5obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o 5obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
6obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o 6obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
7obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o 7obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
8obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
8obj-$(CONFIG_HISI_PMU) += hisilicon/ 9obj-$(CONFIG_HISI_PMU) += hisilicon/
9obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o 10obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
10obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o 11obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index d2c2978409d2..acce8781c456 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -71,6 +71,76 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
71 acpi_unregister_gsi(gsi); 71 acpi_unregister_gsi(gsi);
72} 72}
73 73
74#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
75static struct resource spe_resources[] = {
76 {
77 /* irq */
78 .flags = IORESOURCE_IRQ,
79 }
80};
81
82static struct platform_device spe_dev = {
83 .name = ARMV8_SPE_PDEV_NAME,
84 .id = -1,
85 .resource = spe_resources,
86 .num_resources = ARRAY_SIZE(spe_resources)
87};
88
89/*
90 * For lack of a better place, hook the normal PMU MADT walk
91 * and create a SPE device if we detect a recent MADT with
92 * a homogeneous PPI mapping.
93 */
94static void arm_spe_acpi_register_device(void)
95{
96 int cpu, hetid, irq, ret;
97 bool first = true;
98 u16 gsi = 0;
99
100 /*
101 * Sanity check all the GICC tables for the same interrupt number.
102 * For now, we only support homogeneous ACPI/SPE machines.
103 */
104 for_each_possible_cpu(cpu) {
105 struct acpi_madt_generic_interrupt *gicc;
106
107 gicc = acpi_cpu_get_madt_gicc(cpu);
108 if (gicc->header.length < ACPI_MADT_GICC_SPE)
109 return;
110
111 if (first) {
112 gsi = gicc->spe_interrupt;
113 if (!gsi)
114 return;
115 hetid = find_acpi_cpu_topology_hetero_id(cpu);
116 first = false;
117 } else if ((gsi != gicc->spe_interrupt) ||
118 (hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
119 pr_warn("ACPI: SPE must be homogeneous\n");
120 return;
121 }
122 }
123
124 irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
125 ACPI_ACTIVE_HIGH);
126 if (irq < 0) {
127 pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
128 return;
129 }
130
131 spe_resources[0].start = irq;
132 ret = platform_device_register(&spe_dev);
133 if (ret < 0) {
134 pr_warn("ACPI: SPE: Unable to register device\n");
135 acpi_unregister_gsi(gsi);
136 }
137}
138#else
139static inline void arm_spe_acpi_register_device(void)
140{
141}
142#endif /* CONFIG_ARM_SPE_PMU */
143
74static int arm_pmu_acpi_parse_irqs(void) 144static int arm_pmu_acpi_parse_irqs(void)
75{ 145{
76 int irq, cpu, irq_cpu, err; 146 int irq, cpu, irq_cpu, err;
@@ -276,6 +346,8 @@ static int arm_pmu_acpi_init(void)
276 if (acpi_disabled) 346 if (acpi_disabled)
277 return 0; 347 return 0;
278 348
349 arm_spe_acpi_register_device();
350
279 ret = arm_pmu_acpi_parse_irqs(); 351 ret = arm_pmu_acpi_parse_irqs();
280 if (ret) 352 if (ret)
281 return ret; 353 return ret;
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 49b490925255..4e4984a55cd1 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -27,6 +27,7 @@
27#include <linux/of_address.h> 27#include <linux/of_address.h>
28#include <linux/of_device.h> 28#include <linux/of_device.h>
29#include <linux/perf_event.h> 29#include <linux/perf_event.h>
30#include <linux/perf/arm_pmu.h>
30#include <linux/platform_device.h> 31#include <linux/platform_device.h>
31#include <linux/printk.h> 32#include <linux/printk.h>
32#include <linux/slab.h> 33#include <linux/slab.h>
@@ -1157,7 +1158,13 @@ static const struct of_device_id arm_spe_pmu_of_match[] = {
1157}; 1158};
1158MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match); 1159MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match);
1159 1160
1160static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev) 1161static const struct platform_device_id arm_spe_match[] = {
1162 { ARMV8_SPE_PDEV_NAME, 0},
1163 { }
1164};
1165MODULE_DEVICE_TABLE(platform, arm_spe_match);
1166
1167static int arm_spe_pmu_device_probe(struct platform_device *pdev)
1161{ 1168{
1162 int ret; 1169 int ret;
1163 struct arm_spe_pmu *spe_pmu; 1170 struct arm_spe_pmu *spe_pmu;
@@ -1217,11 +1224,12 @@ static int arm_spe_pmu_device_remove(struct platform_device *pdev)
1217} 1224}
1218 1225
1219static struct platform_driver arm_spe_pmu_driver = { 1226static struct platform_driver arm_spe_pmu_driver = {
1227 .id_table = arm_spe_match,
1220 .driver = { 1228 .driver = {
1221 .name = DRVNAME, 1229 .name = DRVNAME,
1222 .of_match_table = of_match_ptr(arm_spe_pmu_of_match), 1230 .of_match_table = of_match_ptr(arm_spe_pmu_of_match),
1223 }, 1231 },
1224 .probe = arm_spe_pmu_device_dt_probe, 1232 .probe = arm_spe_pmu_device_probe,
1225 .remove = arm_spe_pmu_device_remove, 1233 .remove = arm_spe_pmu_device_remove,
1226}; 1234};
1227 1235
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
new file mode 100644
index 000000000000..63fe21600072
--- /dev/null
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -0,0 +1,554 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2017 NXP
4 * Copyright 2016 Freescale Semiconductor, Inc.
5 */
6
7#include <linux/bitfield.h>
8#include <linux/init.h>
9#include <linux/interrupt.h>
10#include <linux/io.h>
11#include <linux/module.h>
12#include <linux/of.h>
13#include <linux/of_address.h>
14#include <linux/of_device.h>
15#include <linux/of_irq.h>
16#include <linux/perf_event.h>
17#include <linux/slab.h>
18
19#define COUNTER_CNTL 0x0
20#define COUNTER_READ 0x20
21
22#define COUNTER_DPCR1 0x30
23
24#define CNTL_OVER 0x1
25#define CNTL_CLEAR 0x2
26#define CNTL_EN 0x4
27#define CNTL_EN_MASK 0xFFFFFFFB
28#define CNTL_CLEAR_MASK 0xFFFFFFFD
29#define CNTL_OVER_MASK 0xFFFFFFFE
30
31#define CNTL_CSV_SHIFT 24
32#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT)
33
34#define EVENT_CYCLES_ID 0
35#define EVENT_CYCLES_COUNTER 0
36#define NUM_COUNTERS 4
37
38#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
39
40#define DDR_PERF_DEV_NAME "imx8_ddr"
41#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu"
42
43static DEFINE_IDA(ddr_ida);
44
45static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
46 { .compatible = "fsl,imx8-ddr-pmu",},
47 { .compatible = "fsl,imx8m-ddr-pmu",},
48 { /* sentinel */ }
49};
50
51struct ddr_pmu {
52 struct pmu pmu;
53 void __iomem *base;
54 unsigned int cpu;
55 struct hlist_node node;
56 struct device *dev;
57 struct perf_event *events[NUM_COUNTERS];
58 int active_events;
59 enum cpuhp_state cpuhp_state;
60 int irq;
61 int id;
62};
63
64static ssize_t ddr_perf_cpumask_show(struct device *dev,
65 struct device_attribute *attr, char *buf)
66{
67 struct ddr_pmu *pmu = dev_get_drvdata(dev);
68
69 return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
70}
71
72static struct device_attribute ddr_perf_cpumask_attr =
73 __ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL);
74
75static struct attribute *ddr_perf_cpumask_attrs[] = {
76 &ddr_perf_cpumask_attr.attr,
77 NULL,
78};
79
80static struct attribute_group ddr_perf_cpumask_attr_group = {
81 .attrs = ddr_perf_cpumask_attrs,
82};
83
84static ssize_t
85ddr_pmu_event_show(struct device *dev, struct device_attribute *attr,
86 char *page)
87{
88 struct perf_pmu_events_attr *pmu_attr;
89
90 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
91 return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
92}
93
94#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id) \
95 (&((struct perf_pmu_events_attr[]) { \
96 { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
97 .id = _id, } \
98 })[0].attr.attr)
99
100static struct attribute *ddr_perf_events_attrs[] = {
101 IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID),
102 IMX8_DDR_PMU_EVENT_ATTR(selfresh, 0x01),
103 IMX8_DDR_PMU_EVENT_ATTR(read-accesses, 0x04),
104 IMX8_DDR_PMU_EVENT_ATTR(write-accesses, 0x05),
105 IMX8_DDR_PMU_EVENT_ATTR(read-queue-depth, 0x08),
106 IMX8_DDR_PMU_EVENT_ATTR(write-queue-depth, 0x09),
107 IMX8_DDR_PMU_EVENT_ATTR(lp-read-credit-cnt, 0x10),
108 IMX8_DDR_PMU_EVENT_ATTR(hp-read-credit-cnt, 0x11),
109 IMX8_DDR_PMU_EVENT_ATTR(write-credit-cnt, 0x12),
110 IMX8_DDR_PMU_EVENT_ATTR(read-command, 0x20),
111 IMX8_DDR_PMU_EVENT_ATTR(write-command, 0x21),
112 IMX8_DDR_PMU_EVENT_ATTR(read-modify-write-command, 0x22),
113 IMX8_DDR_PMU_EVENT_ATTR(hp-read, 0x23),
114 IMX8_DDR_PMU_EVENT_ATTR(hp-req-nocredit, 0x24),
115 IMX8_DDR_PMU_EVENT_ATTR(hp-xact-credit, 0x25),
116 IMX8_DDR_PMU_EVENT_ATTR(lp-req-nocredit, 0x26),
117 IMX8_DDR_PMU_EVENT_ATTR(lp-xact-credit, 0x27),
118 IMX8_DDR_PMU_EVENT_ATTR(wr-xact-credit, 0x29),
119 IMX8_DDR_PMU_EVENT_ATTR(read-cycles, 0x2a),
120 IMX8_DDR_PMU_EVENT_ATTR(write-cycles, 0x2b),
121 IMX8_DDR_PMU_EVENT_ATTR(read-write-transition, 0x30),
122 IMX8_DDR_PMU_EVENT_ATTR(precharge, 0x31),
123 IMX8_DDR_PMU_EVENT_ATTR(activate, 0x32),
124 IMX8_DDR_PMU_EVENT_ATTR(load-mode, 0x33),
125 IMX8_DDR_PMU_EVENT_ATTR(perf-mwr, 0x34),
126 IMX8_DDR_PMU_EVENT_ATTR(read, 0x35),
127 IMX8_DDR_PMU_EVENT_ATTR(read-activate, 0x36),
128 IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
129 IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
130 IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
131 NULL,
132};
133
134static struct attribute_group ddr_perf_events_attr_group = {
135 .name = "events",
136 .attrs = ddr_perf_events_attrs,
137};
138
139PMU_FORMAT_ATTR(event, "config:0-7");
140
141static struct attribute *ddr_perf_format_attrs[] = {
142 &format_attr_event.attr,
143 NULL,
144};
145
146static struct attribute_group ddr_perf_format_attr_group = {
147 .name = "format",
148 .attrs = ddr_perf_format_attrs,
149};
150
151static const struct attribute_group *attr_groups[] = {
152 &ddr_perf_events_attr_group,
153 &ddr_perf_format_attr_group,
154 &ddr_perf_cpumask_attr_group,
155 NULL,
156};
157
158static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event)
159{
160 int i;
161
162 /*
163 * Always map cycle event to counter 0
164 * Cycles counter is dedicated for cycle event
165 * can't used for the other events
166 */
167 if (event == EVENT_CYCLES_ID) {
168 if (pmu->events[EVENT_CYCLES_COUNTER] == NULL)
169 return EVENT_CYCLES_COUNTER;
170 else
171 return -ENOENT;
172 }
173
174 for (i = 1; i < NUM_COUNTERS; i++) {
175 if (pmu->events[i] == NULL)
176 return i;
177 }
178
179 return -ENOENT;
180}
181
182static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter)
183{
184 pmu->events[counter] = NULL;
185}
186
187static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
188{
189 return readl_relaxed(pmu->base + COUNTER_READ + counter * 4);
190}
191
192static int ddr_perf_event_init(struct perf_event *event)
193{
194 struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
195 struct hw_perf_event *hwc = &event->hw;
196 struct perf_event *sibling;
197
198 if (event->attr.type != event->pmu->type)
199 return -ENOENT;
200
201 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
202 return -EOPNOTSUPP;
203
204 if (event->cpu < 0) {
205 dev_warn(pmu->dev, "Can't provide per-task data!\n");
206 return -EOPNOTSUPP;
207 }
208
209 /*
210 * We must NOT create groups containing mixed PMUs, although software
211 * events are acceptable (for example to create a CCN group
212 * periodically read when a hrtimer aka cpu-clock leader triggers).
213 */
214 if (event->group_leader->pmu != event->pmu &&
215 !is_software_event(event->group_leader))
216 return -EINVAL;
217
218 for_each_sibling_event(sibling, event->group_leader) {
219 if (sibling->pmu != event->pmu &&
220 !is_software_event(sibling))
221 return -EINVAL;
222 }
223
224 event->cpu = pmu->cpu;
225 hwc->idx = -1;
226
227 return 0;
228}
229
230
231static void ddr_perf_event_update(struct perf_event *event)
232{
233 struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
234 struct hw_perf_event *hwc = &event->hw;
235 u64 delta, prev_raw_count, new_raw_count;
236 int counter = hwc->idx;
237
238 do {
239 prev_raw_count = local64_read(&hwc->prev_count);
240 new_raw_count = ddr_perf_read_counter(pmu, counter);
241 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
242 new_raw_count) != prev_raw_count);
243
244 delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF;
245
246 local64_add(delta, &event->count);
247}
248
249static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
250 int counter, bool enable)
251{
252 u8 reg = counter * 4 + COUNTER_CNTL;
253 int val;
254
255 if (enable) {
256 /*
257 * must disable first, then enable again
258 * otherwise, cycle counter will not work
259 * if previous state is enabled.
260 */
261 writel(0, pmu->base + reg);
262 val = CNTL_EN | CNTL_CLEAR;
263 val |= FIELD_PREP(CNTL_CSV_MASK, config);
264 writel(val, pmu->base + reg);
265 } else {
266 /* Disable counter */
267 writel(0, pmu->base + reg);
268 }
269}
270
271static void ddr_perf_event_start(struct perf_event *event, int flags)
272{
273 struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
274 struct hw_perf_event *hwc = &event->hw;
275 int counter = hwc->idx;
276
277 local64_set(&hwc->prev_count, 0);
278
279 ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
280
281 hwc->state = 0;
282}
283
284static int ddr_perf_event_add(struct perf_event *event, int flags)
285{
286 struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
287 struct hw_perf_event *hwc = &event->hw;
288 int counter;
289 int cfg = event->attr.config;
290
291 counter = ddr_perf_alloc_counter(pmu, cfg);
292 if (counter < 0) {
293 dev_dbg(pmu->dev, "There are not enough counters\n");
294 return -EOPNOTSUPP;
295 }
296
297 pmu->events[counter] = event;
298 pmu->active_events++;
299 hwc->idx = counter;
300
301 hwc->state |= PERF_HES_STOPPED;
302
303 if (flags & PERF_EF_START)
304 ddr_perf_event_start(event, flags);
305
306 return 0;
307}
308
309static void ddr_perf_event_stop(struct perf_event *event, int flags)
310{
311 struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
312 struct hw_perf_event *hwc = &event->hw;
313 int counter = hwc->idx;
314
315 ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
316 ddr_perf_event_update(event);
317
318 hwc->state |= PERF_HES_STOPPED;
319}
320
321static void ddr_perf_event_del(struct perf_event *event, int flags)
322{
323 struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
324 struct hw_perf_event *hwc = &event->hw;
325 int counter = hwc->idx;
326
327 ddr_perf_event_stop(event, PERF_EF_UPDATE);
328
329 ddr_perf_free_counter(pmu, counter);
330 pmu->active_events--;
331 hwc->idx = -1;
332}
333
334static void ddr_perf_pmu_enable(struct pmu *pmu)
335{
336 struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
337
338 /* enable cycle counter if cycle is not active event list */
339 if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
340 ddr_perf_counter_enable(ddr_pmu,
341 EVENT_CYCLES_ID,
342 EVENT_CYCLES_COUNTER,
343 true);
344}
345
346static void ddr_perf_pmu_disable(struct pmu *pmu)
347{
348 struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
349
350 if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
351 ddr_perf_counter_enable(ddr_pmu,
352 EVENT_CYCLES_ID,
353 EVENT_CYCLES_COUNTER,
354 false);
355}
356
357static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
358 struct device *dev)
359{
360 *pmu = (struct ddr_pmu) {
361 .pmu = (struct pmu) {
362 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
363 .task_ctx_nr = perf_invalid_context,
364 .attr_groups = attr_groups,
365 .event_init = ddr_perf_event_init,
366 .add = ddr_perf_event_add,
367 .del = ddr_perf_event_del,
368 .start = ddr_perf_event_start,
369 .stop = ddr_perf_event_stop,
370 .read = ddr_perf_event_update,
371 .pmu_enable = ddr_perf_pmu_enable,
372 .pmu_disable = ddr_perf_pmu_disable,
373 },
374 .base = base,
375 .dev = dev,
376 };
377
378 pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
379 return pmu->id;
380}
381
382static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
383{
384 int i;
385 struct ddr_pmu *pmu = (struct ddr_pmu *) p;
386 struct perf_event *event, *cycle_event = NULL;
387
388 /* all counter will stop if cycle counter disabled */
389 ddr_perf_counter_enable(pmu,
390 EVENT_CYCLES_ID,
391 EVENT_CYCLES_COUNTER,
392 false);
393 /*
394 * When the cycle counter overflows, all counters are stopped,
395 * and an IRQ is raised. If any other counter overflows, it
396 * continues counting, and no IRQ is raised.
397 *
398 * Cycles occur at least 4 times as often as other events, so we
399 * can update all events on a cycle counter overflow and not
400 * lose events.
401 *
402 */
403 for (i = 0; i < NUM_COUNTERS; i++) {
404
405 if (!pmu->events[i])
406 continue;
407
408 event = pmu->events[i];
409
410 ddr_perf_event_update(event);
411
412 if (event->hw.idx == EVENT_CYCLES_COUNTER)
413 cycle_event = event;
414 }
415
416 ddr_perf_counter_enable(pmu,
417 EVENT_CYCLES_ID,
418 EVENT_CYCLES_COUNTER,
419 true);
420 if (cycle_event)
421 ddr_perf_event_update(cycle_event);
422
423 return IRQ_HANDLED;
424}
425
426static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
427{
428 struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
429 int target;
430
431 if (cpu != pmu->cpu)
432 return 0;
433
434 target = cpumask_any_but(cpu_online_mask, cpu);
435 if (target >= nr_cpu_ids)
436 return 0;
437
438 perf_pmu_migrate_context(&pmu->pmu, cpu, target);
439 pmu->cpu = target;
440
441 WARN_ON(irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu)));
442
443 return 0;
444}
445
446static int ddr_perf_probe(struct platform_device *pdev)
447{
448 struct ddr_pmu *pmu;
449 struct device_node *np;
450 void __iomem *base;
451 char *name;
452 int num;
453 int ret;
454 int irq;
455
456 base = devm_platform_ioremap_resource(pdev, 0);
457 if (IS_ERR(base))
458 return PTR_ERR(base);
459
460 np = pdev->dev.of_node;
461
462 pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
463 if (!pmu)
464 return -ENOMEM;
465
466 num = ddr_perf_init(pmu, base, &pdev->dev);
467
468 platform_set_drvdata(pdev, pmu);
469
470 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d",
471 num);
472 if (!name)
473 return -ENOMEM;
474
475 pmu->cpu = raw_smp_processor_id();
476 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
477 DDR_CPUHP_CB_NAME,
478 NULL,
479 ddr_perf_offline_cpu);
480
481 if (ret < 0) {
482 dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n");
483 goto ddr_perf_err;
484 }
485
486 pmu->cpuhp_state = ret;
487
488 /* Register the pmu instance for cpu hotplug */
489 cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
490
491 /* Request irq */
492 irq = of_irq_get(np, 0);
493 if (irq < 0) {
494 dev_err(&pdev->dev, "Failed to get irq: %d", irq);
495 ret = irq;
496 goto ddr_perf_err;
497 }
498
499 ret = devm_request_irq(&pdev->dev, irq,
500 ddr_perf_irq_handler,
501 IRQF_NOBALANCING | IRQF_NO_THREAD,
502 DDR_CPUHP_CB_NAME,
503 pmu);
504 if (ret < 0) {
505 dev_err(&pdev->dev, "Request irq failed: %d", ret);
506 goto ddr_perf_err;
507 }
508
509 pmu->irq = irq;
510 ret = irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu));
511 if (ret) {
512 dev_err(pmu->dev, "Failed to set interrupt affinity!\n");
513 goto ddr_perf_err;
514 }
515
516 ret = perf_pmu_register(&pmu->pmu, name, -1);
517 if (ret)
518 goto ddr_perf_err;
519
520 return 0;
521
522ddr_perf_err:
523 if (pmu->cpuhp_state)
524 cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
525
526 ida_simple_remove(&ddr_ida, pmu->id);
527 dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
528 return ret;
529}
530
531static int ddr_perf_remove(struct platform_device *pdev)
532{
533 struct ddr_pmu *pmu = platform_get_drvdata(pdev);
534
535 cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
536 irq_set_affinity_hint(pmu->irq, NULL);
537
538 perf_pmu_unregister(&pmu->pmu);
539
540 ida_simple_remove(&ddr_ida, pmu->id);
541 return 0;
542}
543
544static struct platform_driver imx_ddr_pmu_driver = {
545 .driver = {
546 .name = "imx-ddr-pmu",
547 .of_match_table = imx_ddr_pmu_dt_ids,
548 },
549 .probe = ddr_perf_probe,
550 .remove = ddr_perf_remove,
551};
552
553module_platform_driver(imx_ddr_pmu_driver);
554MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c
index f464f8cd274b..7e526bcf5e0b 100644
--- a/drivers/pinctrl/mediatek/mtk-eint.c
+++ b/drivers/pinctrl/mediatek/mtk-eint.c
@@ -113,6 +113,8 @@ static void mtk_eint_mask(struct irq_data *d)
113 void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq, 113 void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq,
114 eint->regs->mask_set); 114 eint->regs->mask_set);
115 115
116 eint->cur_mask[d->hwirq >> 5] &= ~mask;
117
116 writel(mask, reg); 118 writel(mask, reg);
117} 119}
118 120
@@ -123,6 +125,8 @@ static void mtk_eint_unmask(struct irq_data *d)
123 void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq, 125 void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq,
124 eint->regs->mask_clr); 126 eint->regs->mask_clr);
125 127
128 eint->cur_mask[d->hwirq >> 5] |= mask;
129
126 writel(mask, reg); 130 writel(mask, reg);
127 131
128 if (eint->dual_edge[d->hwirq]) 132 if (eint->dual_edge[d->hwirq])
@@ -217,19 +221,6 @@ static void mtk_eint_chip_write_mask(const struct mtk_eint *eint,
217 } 221 }
218} 222}
219 223
220static void mtk_eint_chip_read_mask(const struct mtk_eint *eint,
221 void __iomem *base, u32 *buf)
222{
223 int port;
224 void __iomem *reg;
225
226 for (port = 0; port < eint->hw->ports; port++) {
227 reg = base + eint->regs->mask + (port << 2);
228 buf[port] = ~readl_relaxed(reg);
229 /* Mask is 0 when irq is enabled, and 1 when disabled. */
230 }
231}
232
233static int mtk_eint_irq_request_resources(struct irq_data *d) 224static int mtk_eint_irq_request_resources(struct irq_data *d)
234{ 225{
235 struct mtk_eint *eint = irq_data_get_irq_chip_data(d); 226 struct mtk_eint *eint = irq_data_get_irq_chip_data(d);
@@ -318,7 +309,7 @@ static void mtk_eint_irq_handler(struct irq_desc *desc)
318 struct irq_chip *chip = irq_desc_get_chip(desc); 309 struct irq_chip *chip = irq_desc_get_chip(desc);
319 struct mtk_eint *eint = irq_desc_get_handler_data(desc); 310 struct mtk_eint *eint = irq_desc_get_handler_data(desc);
320 unsigned int status, eint_num; 311 unsigned int status, eint_num;
321 int offset, index, virq; 312 int offset, mask_offset, index, virq;
322 void __iomem *reg = mtk_eint_get_offset(eint, 0, eint->regs->stat); 313 void __iomem *reg = mtk_eint_get_offset(eint, 0, eint->regs->stat);
323 int dual_edge, start_level, curr_level; 314 int dual_edge, start_level, curr_level;
324 315
@@ -328,10 +319,24 @@ static void mtk_eint_irq_handler(struct irq_desc *desc)
328 status = readl(reg); 319 status = readl(reg);
329 while (status) { 320 while (status) {
330 offset = __ffs(status); 321 offset = __ffs(status);
322 mask_offset = eint_num >> 5;
331 index = eint_num + offset; 323 index = eint_num + offset;
332 virq = irq_find_mapping(eint->domain, index); 324 virq = irq_find_mapping(eint->domain, index);
333 status &= ~BIT(offset); 325 status &= ~BIT(offset);
334 326
327 /*
328 * If we get an interrupt on pin that was only required
329 * for wake (but no real interrupt requested), mask the
330 * interrupt (as would mtk_eint_resume do anyway later
331 * in the resume sequence).
332 */
333 if (eint->wake_mask[mask_offset] & BIT(offset) &&
334 !(eint->cur_mask[mask_offset] & BIT(offset))) {
335 writel_relaxed(BIT(offset), reg -
336 eint->regs->stat +
337 eint->regs->mask_set);
338 }
339
335 dual_edge = eint->dual_edge[index]; 340 dual_edge = eint->dual_edge[index];
336 if (dual_edge) { 341 if (dual_edge) {
337 /* 342 /*
@@ -370,7 +375,6 @@ static void mtk_eint_irq_handler(struct irq_desc *desc)
370 375
371int mtk_eint_do_suspend(struct mtk_eint *eint) 376int mtk_eint_do_suspend(struct mtk_eint *eint)
372{ 377{
373 mtk_eint_chip_read_mask(eint, eint->base, eint->cur_mask);
374 mtk_eint_chip_write_mask(eint, eint->base, eint->wake_mask); 378 mtk_eint_chip_write_mask(eint, eint->base, eint->wake_mask);
375 379
376 return 0; 380 return 0;
diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c
index 568ca96cdb6d..3a235487e38d 100644
--- a/drivers/pinctrl/pinctrl-mcp23s08.c
+++ b/drivers/pinctrl/pinctrl-mcp23s08.c
@@ -771,6 +771,10 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
771 if (ret < 0) 771 if (ret < 0)
772 goto fail; 772 goto fail;
773 773
774 ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp);
775 if (ret < 0)
776 goto fail;
777
774 mcp->irq_controller = 778 mcp->irq_controller =
775 device_property_read_bool(dev, "interrupt-controller"); 779 device_property_read_bool(dev, "interrupt-controller");
776 if (mcp->irq && mcp->irq_controller) { 780 if (mcp->irq && mcp->irq_controller) {
@@ -812,10 +816,6 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
812 goto fail; 816 goto fail;
813 } 817 }
814 818
815 ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp);
816 if (ret < 0)
817 goto fail;
818
819 if (one_regmap_config) { 819 if (one_regmap_config) {
820 mcp->pinctrl_desc.name = devm_kasprintf(dev, GFP_KERNEL, 820 mcp->pinctrl_desc.name = devm_kasprintf(dev, GFP_KERNEL,
821 "mcp23xxx-pinctrl.%d", raw_chip_address); 821 "mcp23xxx-pinctrl.%d", raw_chip_address);
diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
index 3b4ca52d2456..fb76fb2e9ea5 100644
--- a/drivers/pinctrl/pinctrl-ocelot.c
+++ b/drivers/pinctrl/pinctrl-ocelot.c
@@ -396,7 +396,7 @@ static int ocelot_pin_function_idx(struct ocelot_pinctrl *info,
396 return -1; 396 return -1;
397} 397}
398 398
399#define REG(r, info, p) ((r) * (info)->stride + (4 * ((p) / 32))) 399#define REG_ALT(msb, info, p) (OCELOT_GPIO_ALT0 * (info)->stride + 4 * ((msb) + ((info)->stride * ((p) / 32))))
400 400
401static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev, 401static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev,
402 unsigned int selector, unsigned int group) 402 unsigned int selector, unsigned int group)
@@ -412,19 +412,21 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev,
412 412
413 /* 413 /*
414 * f is encoded on two bits. 414 * f is encoded on two bits.
415 * bit 0 of f goes in BIT(pin) of ALT0, bit 1 of f goes in BIT(pin) of 415 * bit 0 of f goes in BIT(pin) of ALT[0], bit 1 of f goes in BIT(pin) of
416 * ALT1 416 * ALT[1]
417 * This is racy because both registers can't be updated at the same time 417 * This is racy because both registers can't be updated at the same time
418 * but it doesn't matter much for now. 418 * but it doesn't matter much for now.
419 */ 419 */
420 regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT0, info, pin->pin), 420 regmap_update_bits(info->map, REG_ALT(0, info, pin->pin),
421 BIT(p), f << p); 421 BIT(p), f << p);
422 regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT1, info, pin->pin), 422 regmap_update_bits(info->map, REG_ALT(1, info, pin->pin),
423 BIT(p), f << (p - 1)); 423 BIT(p), f << (p - 1));
424 424
425 return 0; 425 return 0;
426} 426}
427 427
428#define REG(r, info, p) ((r) * (info)->stride + (4 * ((p) / 32)))
429
428static int ocelot_gpio_set_direction(struct pinctrl_dev *pctldev, 430static int ocelot_gpio_set_direction(struct pinctrl_dev *pctldev,
429 struct pinctrl_gpio_range *range, 431 struct pinctrl_gpio_range *range,
430 unsigned int pin, bool input) 432 unsigned int pin, bool input)
@@ -432,7 +434,7 @@ static int ocelot_gpio_set_direction(struct pinctrl_dev *pctldev,
432 struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); 434 struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
433 unsigned int p = pin % 32; 435 unsigned int p = pin % 32;
434 436
435 regmap_update_bits(info->map, REG(OCELOT_GPIO_OE, info, p), BIT(p), 437 regmap_update_bits(info->map, REG(OCELOT_GPIO_OE, info, pin), BIT(p),
436 input ? 0 : BIT(p)); 438 input ? 0 : BIT(p));
437 439
438 return 0; 440 return 0;
@@ -445,9 +447,9 @@ static int ocelot_gpio_request_enable(struct pinctrl_dev *pctldev,
445 struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); 447 struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
446 unsigned int p = offset % 32; 448 unsigned int p = offset % 32;
447 449
448 regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT0, info, offset), 450 regmap_update_bits(info->map, REG_ALT(0, info, offset),
449 BIT(p), 0); 451 BIT(p), 0);
450 regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT1, info, offset), 452 regmap_update_bits(info->map, REG_ALT(1, info, offset),
451 BIT(p), 0); 453 BIT(p), 0);
452 454
453 return 0; 455 return 0;
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 673f8a128397..5d545806d930 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -1,4 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2017-2019 Borislav Petkov, SUSE Labs.
4 */
2#include <linux/mm.h> 5#include <linux/mm.h>
3#include <linux/gfp.h> 6#include <linux/gfp.h>
4#include <linux/kernel.h> 7#include <linux/kernel.h>
@@ -37,9 +40,9 @@
37 * thus emulate an an LRU-like behavior when deleting elements to free up space 40 * thus emulate an an LRU-like behavior when deleting elements to free up space
38 * in the page. 41 * in the page.
39 * 42 *
40 * When an element reaches it's max count of count_threshold, we try to poison 43 * When an element reaches it's max count of action_threshold, we try to poison
41 * it by assuming that errors triggered count_threshold times in a single page 44 * it by assuming that errors triggered action_threshold times in a single page
42 * are excessive and that page shouldn't be used anymore. count_threshold is 45 * are excessive and that page shouldn't be used anymore. action_threshold is
43 * initialized to COUNT_MASK which is the maximum. 46 * initialized to COUNT_MASK which is the maximum.
44 * 47 *
45 * That error event entry causes cec_add_elem() to return !0 value and thus 48 * That error event entry causes cec_add_elem() to return !0 value and thus
@@ -122,7 +125,7 @@ static DEFINE_MUTEX(ce_mutex);
122static u64 dfs_pfn; 125static u64 dfs_pfn;
123 126
124/* Amount of errors after which we offline */ 127/* Amount of errors after which we offline */
125static unsigned int count_threshold = COUNT_MASK; 128static u64 action_threshold = COUNT_MASK;
126 129
127/* Each element "decays" each decay_interval which is 24hrs by default. */ 130/* Each element "decays" each decay_interval which is 24hrs by default. */
128#define CEC_DECAY_DEFAULT_INTERVAL 24 * 60 * 60 /* 24 hrs */ 131#define CEC_DECAY_DEFAULT_INTERVAL 24 * 60 * 60 /* 24 hrs */
@@ -276,11 +279,39 @@ static u64 __maybe_unused del_lru_elem(void)
276 return pfn; 279 return pfn;
277} 280}
278 281
282static bool sanity_check(struct ce_array *ca)
283{
284 bool ret = false;
285 u64 prev = 0;
286 int i;
287
288 for (i = 0; i < ca->n; i++) {
289 u64 this = PFN(ca->array[i]);
290
291 if (WARN(prev > this, "prev: 0x%016llx <-> this: 0x%016llx\n", prev, this))
292 ret = true;
293
294 prev = this;
295 }
296
297 if (!ret)
298 return ret;
299
300 pr_info("Sanity check dump:\n{ n: %d\n", ca->n);
301 for (i = 0; i < ca->n; i++) {
302 u64 this = PFN(ca->array[i]);
303
304 pr_info(" %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i]));
305 }
306 pr_info("}\n");
307
308 return ret;
309}
279 310
280int cec_add_elem(u64 pfn) 311int cec_add_elem(u64 pfn)
281{ 312{
282 struct ce_array *ca = &ce_arr; 313 struct ce_array *ca = &ce_arr;
283 unsigned int to; 314 unsigned int to = 0;
284 int count, ret = 0; 315 int count, ret = 0;
285 316
286 /* 317 /*
@@ -294,6 +325,7 @@ int cec_add_elem(u64 pfn)
294 325
295 ca->ces_entered++; 326 ca->ces_entered++;
296 327
328 /* Array full, free the LRU slot. */
297 if (ca->n == MAX_ELEMS) 329 if (ca->n == MAX_ELEMS)
298 WARN_ON(!del_lru_elem_unlocked(ca)); 330 WARN_ON(!del_lru_elem_unlocked(ca));
299 331
@@ -306,24 +338,17 @@ int cec_add_elem(u64 pfn)
306 (void *)&ca->array[to], 338 (void *)&ca->array[to],
307 (ca->n - to) * sizeof(u64)); 339 (ca->n - to) * sizeof(u64));
308 340
309 ca->array[to] = (pfn << PAGE_SHIFT) | 341 ca->array[to] = pfn << PAGE_SHIFT;
310 (DECAY_MASK << COUNT_BITS) | 1;
311
312 ca->n++; 342 ca->n++;
313
314 ret = 0;
315
316 goto decay;
317 } 343 }
318 344
319 count = COUNT(ca->array[to]); 345 /* Add/refresh element generation and increment count */
320 346 ca->array[to] |= DECAY_MASK << COUNT_BITS;
321 if (count < count_threshold) { 347 ca->array[to]++;
322 ca->array[to] |= (DECAY_MASK << COUNT_BITS);
323 ca->array[to]++;
324 348
325 ret = 0; 349 /* Check action threshold and soft-offline, if reached. */
326 } else { 350 count = COUNT(ca->array[to]);
351 if (count >= action_threshold) {
327 u64 pfn = ca->array[to] >> PAGE_SHIFT; 352 u64 pfn = ca->array[to] >> PAGE_SHIFT;
328 353
329 if (!pfn_valid(pfn)) { 354 if (!pfn_valid(pfn)) {
@@ -338,20 +363,21 @@ int cec_add_elem(u64 pfn)
338 del_elem(ca, to); 363 del_elem(ca, to);
339 364
340 /* 365 /*
341 * Return a >0 value to denote that we've reached the offlining 366 * Return a >0 value to callers, to denote that we've reached
342 * threshold. 367 * the offlining threshold.
343 */ 368 */
344 ret = 1; 369 ret = 1;
345 370
346 goto unlock; 371 goto unlock;
347 } 372 }
348 373
349decay:
350 ca->decay_count++; 374 ca->decay_count++;
351 375
352 if (ca->decay_count >= CLEAN_ELEMS) 376 if (ca->decay_count >= CLEAN_ELEMS)
353 do_spring_cleaning(ca); 377 do_spring_cleaning(ca);
354 378
379 WARN_ON_ONCE(sanity_check(ca));
380
355unlock: 381unlock:
356 mutex_unlock(&ce_mutex); 382 mutex_unlock(&ce_mutex);
357 383
@@ -369,45 +395,48 @@ static int pfn_set(void *data, u64 val)
369{ 395{
370 *(u64 *)data = val; 396 *(u64 *)data = val;
371 397
372 return cec_add_elem(val); 398 cec_add_elem(val);
399
400 return 0;
373} 401}
374 402
375DEFINE_DEBUGFS_ATTRIBUTE(pfn_ops, u64_get, pfn_set, "0x%llx\n"); 403DEFINE_DEBUGFS_ATTRIBUTE(pfn_ops, u64_get, pfn_set, "0x%llx\n");
376 404
377static int decay_interval_set(void *data, u64 val) 405static int decay_interval_set(void *data, u64 val)
378{ 406{
379 *(u64 *)data = val;
380
381 if (val < CEC_DECAY_MIN_INTERVAL) 407 if (val < CEC_DECAY_MIN_INTERVAL)
382 return -EINVAL; 408 return -EINVAL;
383 409
384 if (val > CEC_DECAY_MAX_INTERVAL) 410 if (val > CEC_DECAY_MAX_INTERVAL)
385 return -EINVAL; 411 return -EINVAL;
386 412
413 *(u64 *)data = val;
387 decay_interval = val; 414 decay_interval = val;
388 415
389 cec_mod_work(decay_interval); 416 cec_mod_work(decay_interval);
417
390 return 0; 418 return 0;
391} 419}
392DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n"); 420DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n");
393 421
394static int count_threshold_set(void *data, u64 val) 422static int action_threshold_set(void *data, u64 val)
395{ 423{
396 *(u64 *)data = val; 424 *(u64 *)data = val;
397 425
398 if (val > COUNT_MASK) 426 if (val > COUNT_MASK)
399 val = COUNT_MASK; 427 val = COUNT_MASK;
400 428
401 count_threshold = val; 429 action_threshold = val;
402 430
403 return 0; 431 return 0;
404} 432}
405DEFINE_DEBUGFS_ATTRIBUTE(count_threshold_ops, u64_get, count_threshold_set, "%lld\n"); 433DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%lld\n");
434
435static const char * const bins[] = { "00", "01", "10", "11" };
406 436
407static int array_dump(struct seq_file *m, void *v) 437static int array_dump(struct seq_file *m, void *v)
408{ 438{
409 struct ce_array *ca = &ce_arr; 439 struct ce_array *ca = &ce_arr;
410 u64 prev = 0;
411 int i; 440 int i;
412 441
413 mutex_lock(&ce_mutex); 442 mutex_lock(&ce_mutex);
@@ -416,11 +445,8 @@ static int array_dump(struct seq_file *m, void *v)
416 for (i = 0; i < ca->n; i++) { 445 for (i = 0; i < ca->n; i++) {
417 u64 this = PFN(ca->array[i]); 446 u64 this = PFN(ca->array[i]);
418 447
419 seq_printf(m, " %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i])); 448 seq_printf(m, " %3d: [%016llx|%s|%03llx]\n",
420 449 i, this, bins[DECAY(ca->array[i])], COUNT(ca->array[i]));
421 WARN_ON(prev > this);
422
423 prev = this;
424 } 450 }
425 451
426 seq_printf(m, "}\n"); 452 seq_printf(m, "}\n");
@@ -433,7 +459,7 @@ static int array_dump(struct seq_file *m, void *v)
433 seq_printf(m, "Decay interval: %lld seconds\n", decay_interval); 459 seq_printf(m, "Decay interval: %lld seconds\n", decay_interval);
434 seq_printf(m, "Decays: %lld\n", ca->decays_done); 460 seq_printf(m, "Decays: %lld\n", ca->decays_done);
435 461
436 seq_printf(m, "Action threshold: %d\n", count_threshold); 462 seq_printf(m, "Action threshold: %lld\n", action_threshold);
437 463
438 mutex_unlock(&ce_mutex); 464 mutex_unlock(&ce_mutex);
439 465
@@ -463,18 +489,6 @@ static int __init create_debugfs_nodes(void)
463 return -1; 489 return -1;
464 } 490 }
465 491
466 pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
467 if (!pfn) {
468 pr_warn("Error creating pfn debugfs node!\n");
469 goto err;
470 }
471
472 array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
473 if (!array) {
474 pr_warn("Error creating array debugfs node!\n");
475 goto err;
476 }
477
478 decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d, 492 decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d,
479 &decay_interval, &decay_interval_ops); 493 &decay_interval, &decay_interval_ops);
480 if (!decay) { 494 if (!decay) {
@@ -482,13 +496,27 @@ static int __init create_debugfs_nodes(void)
482 goto err; 496 goto err;
483 } 497 }
484 498
485 count = debugfs_create_file("count_threshold", S_IRUSR | S_IWUSR, d, 499 count = debugfs_create_file("action_threshold", S_IRUSR | S_IWUSR, d,
486 &count_threshold, &count_threshold_ops); 500 &action_threshold, &action_threshold_ops);
487 if (!count) { 501 if (!count) {
488 pr_warn("Error creating count_threshold debugfs node!\n"); 502 pr_warn("Error creating action_threshold debugfs node!\n");
503 goto err;
504 }
505
506 if (!IS_ENABLED(CONFIG_RAS_CEC_DEBUG))
507 return 0;
508
509 pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
510 if (!pfn) {
511 pr_warn("Error creating pfn debugfs node!\n");
489 goto err; 512 goto err;
490 } 513 }
491 514
515 array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
516 if (!array) {
517 pr_warn("Error creating array debugfs node!\n");
518 goto err;
519 }
492 520
493 return 0; 521 return 0;
494 522
@@ -509,8 +537,10 @@ void __init cec_init(void)
509 return; 537 return;
510 } 538 }
511 539
512 if (create_debugfs_nodes()) 540 if (create_debugfs_nodes()) {
541 free_page((unsigned long)ce_arr.array);
513 return; 542 return;
543 }
514 544
515 INIT_DELAYED_WORK(&cec_work, cec_work_fn); 545 INIT_DELAYED_WORK(&cec_work, cec_work_fn);
516 schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL); 546 schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL);
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 9ac7574e3cfb..a8682f69effc 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -38,7 +38,7 @@ config DASD_PROFILE
38 depends on DASD 38 depends on DASD
39 help 39 help
40 Enable this option if you want to see profiling information 40 Enable this option if you want to see profiling information
41 in /proc/dasd/statistics. 41 in /proc/dasd/statistics.
42 42
43config DASD_ECKD 43config DASD_ECKD
44 def_tristate y 44 def_tristate y
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index fab35c6170cc..245f33c2f71e 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -203,7 +203,7 @@ static int __init dasd_feature_list(char *str)
203 else if (len == 8 && !strncmp(str, "failfast", 8)) 203 else if (len == 8 && !strncmp(str, "failfast", 8))
204 features |= DASD_FEATURE_FAILFAST; 204 features |= DASD_FEATURE_FAILFAST;
205 else { 205 else {
206 pr_warn("%*s is not a supported device option\n", 206 pr_warn("%.*s is not a supported device option\n",
207 len, str); 207 len, str);
208 rc = -EINVAL; 208 rc = -EINVAL;
209 } 209 }
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index ab0b243a947d..6cc4b19acf85 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -79,27 +79,6 @@ config SCLP_VT220_CONSOLE
79 Include support for using an IBM SCLP VT220-compatible terminal as a 79 Include support for using an IBM SCLP VT220-compatible terminal as a
80 Linux system console. 80 Linux system console.
81 81
82config SCLP_ASYNC
83 def_tristate m
84 prompt "Support for Call Home via Asynchronous SCLP Records"
85 depends on S390
86 help
87 This option enables the call home function, which is able to inform
88 the service element and connected organisations about a kernel panic.
89 You should only select this option if you know what you are doing,
90 want for inform other people about your kernel panics,
91 need this feature and intend to run your kernel in LPAR.
92
93config SCLP_ASYNC_ID
94 string "Component ID for Call Home"
95 depends on SCLP_ASYNC
96 default "000000000"
97 help
98 The Component ID for Call Home is used to identify the correct
99 problem reporting queue the call home records should be sent to.
100
101 If your are unsure, please use the default value "000000000".
102
103config HMC_DRV 82config HMC_DRV
104 def_tristate m 83 def_tristate m
105 prompt "Support for file transfers from HMC drive CD/DVD-ROM" 84 prompt "Support for file transfers from HMC drive CD/DVD-ROM"
@@ -205,4 +184,3 @@ config S390_VMUR
205 depends on S390 184 depends on S390
206 help 185 help
207 Character device driver for z/VM reader, puncher and printer. 186 Character device driver for z/VM reader, puncher and printer.
208
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 3072b89785dd..b8a8816d94e7 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -31,7 +31,6 @@ obj-$(CONFIG_TN3215) += con3215.o
31obj-$(CONFIG_SCLP_TTY) += sclp_tty.o 31obj-$(CONFIG_SCLP_TTY) += sclp_tty.o
32obj-$(CONFIG_SCLP_CONSOLE) += sclp_con.o 32obj-$(CONFIG_SCLP_CONSOLE) += sclp_con.o
33obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o 33obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o
34obj-$(CONFIG_SCLP_ASYNC) += sclp_async.o
35 34
36obj-$(CONFIG_PCI) += sclp_pci.o 35obj-$(CONFIG_PCI) += sclp_pci.o
37 36
diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c
deleted file mode 100644
index e69b12a40636..000000000000
--- a/drivers/s390/char/sclp_async.c
+++ /dev/null
@@ -1,189 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Enable Asynchronous Notification via SCLP.
4 *
5 * Copyright IBM Corp. 2009
6 * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/device.h>
13#include <linux/stat.h>
14#include <linux/string.h>
15#include <linux/slab.h>
16#include <linux/ctype.h>
17#include <linux/kmod.h>
18#include <linux/err.h>
19#include <linux/errno.h>
20#include <linux/proc_fs.h>
21#include <linux/sysctl.h>
22#include <linux/utsname.h>
23#include "sclp.h"
24
25static int callhome_enabled;
26static struct sclp_req *request;
27static struct sclp_async_sccb *sccb;
28static int sclp_async_send_wait(char *message);
29static struct ctl_table_header *callhome_sysctl_header;
30static DEFINE_SPINLOCK(sclp_async_lock);
31#define SCLP_NORMAL_WRITE 0x00
32
33struct async_evbuf {
34 struct evbuf_header header;
35 u64 reserved;
36 u8 rflags;
37 u8 empty;
38 u8 rtype;
39 u8 otype;
40 char comp_id[12];
41 char data[3000]; /* there is still some space left */
42} __attribute__((packed));
43
44struct sclp_async_sccb {
45 struct sccb_header header;
46 struct async_evbuf evbuf;
47} __attribute__((packed));
48
49static struct sclp_register sclp_async_register = {
50 .send_mask = EVTYP_ASYNC_MASK,
51};
52
53static int call_home_on_panic(struct notifier_block *self,
54 unsigned long event, void *data)
55{
56 strncat(data, init_utsname()->nodename,
57 sizeof(init_utsname()->nodename));
58 sclp_async_send_wait(data);
59 return NOTIFY_DONE;
60}
61
62static struct notifier_block call_home_panic_nb = {
63 .notifier_call = call_home_on_panic,
64 .priority = INT_MAX,
65};
66
67static int zero;
68static int one = 1;
69
70static struct ctl_table callhome_table[] = {
71 {
72 .procname = "callhome",
73 .data = &callhome_enabled,
74 .maxlen = sizeof(int),
75 .mode = 0644,
76 .proc_handler = proc_dointvec_minmax,
77 .extra1 = &zero,
78 .extra2 = &one,
79 },
80 {}
81};
82
83static struct ctl_table kern_dir_table[] = {
84 {
85 .procname = "kernel",
86 .maxlen = 0,
87 .mode = 0555,
88 .child = callhome_table,
89 },
90 {}
91};
92
93/*
94 * Function used to transfer asynchronous notification
95 * records which waits for send completion
96 */
97static int sclp_async_send_wait(char *message)
98{
99 struct async_evbuf *evb;
100 int rc;
101 unsigned long flags;
102
103 if (!callhome_enabled)
104 return 0;
105 sccb->evbuf.header.type = EVTYP_ASYNC;
106 sccb->evbuf.rtype = 0xA5;
107 sccb->evbuf.otype = 0x00;
108 evb = &sccb->evbuf;
109 request->command = SCLP_CMDW_WRITE_EVENT_DATA;
110 request->sccb = sccb;
111 request->status = SCLP_REQ_FILLED;
112 strncpy(sccb->evbuf.data, message, sizeof(sccb->evbuf.data));
113 /*
114 * Retain Queue
115 * e.g. 5639CC140 500 Red Hat RHEL5 Linux for zSeries (RHEL AS)
116 */
117 strncpy(sccb->evbuf.comp_id, CONFIG_SCLP_ASYNC_ID,
118 sizeof(sccb->evbuf.comp_id));
119 sccb->evbuf.header.length = sizeof(sccb->evbuf);
120 sccb->header.length = sizeof(sccb->evbuf) + sizeof(sccb->header);
121 sccb->header.function_code = SCLP_NORMAL_WRITE;
122 rc = sclp_add_request(request);
123 if (rc)
124 return rc;
125 spin_lock_irqsave(&sclp_async_lock, flags);
126 while (request->status != SCLP_REQ_DONE &&
127 request->status != SCLP_REQ_FAILED) {
128 sclp_sync_wait();
129 }
130 spin_unlock_irqrestore(&sclp_async_lock, flags);
131 if (request->status != SCLP_REQ_DONE)
132 return -EIO;
133 rc = ((struct sclp_async_sccb *)
134 request->sccb)->header.response_code;
135 if (rc != 0x0020)
136 return -EIO;
137 if (evb->header.flags != 0x80)
138 return -EIO;
139 return rc;
140}
141
142static int __init sclp_async_init(void)
143{
144 int rc;
145
146 rc = sclp_register(&sclp_async_register);
147 if (rc)
148 return rc;
149 rc = -EOPNOTSUPP;
150 if (!(sclp_async_register.sclp_receive_mask & EVTYP_ASYNC_MASK))
151 goto out_sclp;
152 rc = -ENOMEM;
153 callhome_sysctl_header = register_sysctl_table(kern_dir_table);
154 if (!callhome_sysctl_header)
155 goto out_sclp;
156 request = kzalloc(sizeof(struct sclp_req), GFP_KERNEL);
157 sccb = (struct sclp_async_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
158 if (!request || !sccb)
159 goto out_mem;
160 rc = atomic_notifier_chain_register(&panic_notifier_list,
161 &call_home_panic_nb);
162 if (!rc)
163 goto out;
164out_mem:
165 kfree(request);
166 free_page((unsigned long) sccb);
167 unregister_sysctl_table(callhome_sysctl_header);
168out_sclp:
169 sclp_unregister(&sclp_async_register);
170out:
171 return rc;
172}
173module_init(sclp_async_init);
174
175static void __exit sclp_async_exit(void)
176{
177 atomic_notifier_chain_unregister(&panic_notifier_list,
178 &call_home_panic_nb);
179 unregister_sysctl_table(callhome_sysctl_header);
180 sclp_unregister(&sclp_async_register);
181 free_page((unsigned long) sccb);
182 kfree(request);
183}
184module_exit(sclp_async_exit);
185
186MODULE_AUTHOR("Copyright IBM Corp. 2009");
187MODULE_AUTHOR("Hans-Joachim Picht <hans@linux.vnet.ibm.com>");
188MODULE_LICENSE("GPL");
189MODULE_DESCRIPTION("SCLP Asynchronous Notification Records");
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 405a60538630..08f812475f5e 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -4,7 +4,7 @@
4 * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same 4 * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same
5 * dump format as s390 standalone dumps. 5 * dump format as s390 standalone dumps.
6 * 6 *
7 * For more information please refer to Documentation/s390/zfcpdump.txt 7 * For more information please refer to Documentation/s390/zfcpdump.rst
8 * 8 *
9 * Copyright IBM Corp. 2003, 2008 9 * Copyright IBM Corp. 2003, 2008
10 * Author(s): Michael Holzheu 10 * Author(s): Michael Holzheu
diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c
index 4534afc63591..427b2e24a8ce 100644
--- a/drivers/s390/cio/airq.c
+++ b/drivers/s390/cio/airq.c
@@ -16,9 +16,11 @@
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/rculist.h> 17#include <linux/rculist.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/dmapool.h>
19 20
20#include <asm/airq.h> 21#include <asm/airq.h>
21#include <asm/isc.h> 22#include <asm/isc.h>
23#include <asm/cio.h>
22 24
23#include "cio.h" 25#include "cio.h"
24#include "cio_debug.h" 26#include "cio_debug.h"
@@ -27,7 +29,7 @@
27static DEFINE_SPINLOCK(airq_lists_lock); 29static DEFINE_SPINLOCK(airq_lists_lock);
28static struct hlist_head airq_lists[MAX_ISC+1]; 30static struct hlist_head airq_lists[MAX_ISC+1];
29 31
30static struct kmem_cache *airq_iv_cache; 32static struct dma_pool *airq_iv_cache;
31 33
32/** 34/**
33 * register_adapter_interrupt() - register adapter interrupt handler 35 * register_adapter_interrupt() - register adapter interrupt handler
@@ -115,6 +117,11 @@ void __init init_airq_interrupts(void)
115 setup_irq(THIN_INTERRUPT, &airq_interrupt); 117 setup_irq(THIN_INTERRUPT, &airq_interrupt);
116} 118}
117 119
120static inline unsigned long iv_size(unsigned long bits)
121{
122 return BITS_TO_LONGS(bits) * sizeof(unsigned long);
123}
124
118/** 125/**
119 * airq_iv_create - create an interrupt vector 126 * airq_iv_create - create an interrupt vector
120 * @bits: number of bits in the interrupt vector 127 * @bits: number of bits in the interrupt vector
@@ -132,17 +139,19 @@ struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags)
132 goto out; 139 goto out;
133 iv->bits = bits; 140 iv->bits = bits;
134 iv->flags = flags; 141 iv->flags = flags;
135 size = BITS_TO_LONGS(bits) * sizeof(unsigned long); 142 size = iv_size(bits);
136 143
137 if (flags & AIRQ_IV_CACHELINE) { 144 if (flags & AIRQ_IV_CACHELINE) {
138 if ((cache_line_size() * BITS_PER_BYTE) < bits) 145 if ((cache_line_size() * BITS_PER_BYTE) < bits
146 || !airq_iv_cache)
139 goto out_free; 147 goto out_free;
140 148
141 iv->vector = kmem_cache_zalloc(airq_iv_cache, GFP_KERNEL); 149 iv->vector = dma_pool_zalloc(airq_iv_cache, GFP_KERNEL,
150 &iv->vector_dma);
142 if (!iv->vector) 151 if (!iv->vector)
143 goto out_free; 152 goto out_free;
144 } else { 153 } else {
145 iv->vector = kzalloc(size, GFP_KERNEL); 154 iv->vector = cio_dma_zalloc(size);
146 if (!iv->vector) 155 if (!iv->vector)
147 goto out_free; 156 goto out_free;
148 } 157 }
@@ -178,10 +187,10 @@ out_free:
178 kfree(iv->ptr); 187 kfree(iv->ptr);
179 kfree(iv->bitlock); 188 kfree(iv->bitlock);
180 kfree(iv->avail); 189 kfree(iv->avail);
181 if (iv->flags & AIRQ_IV_CACHELINE) 190 if (iv->flags & AIRQ_IV_CACHELINE && iv->vector)
182 kmem_cache_free(airq_iv_cache, iv->vector); 191 dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
183 else 192 else
184 kfree(iv->vector); 193 cio_dma_free(iv->vector, size);
185 kfree(iv); 194 kfree(iv);
186out: 195out:
187 return NULL; 196 return NULL;
@@ -198,9 +207,9 @@ void airq_iv_release(struct airq_iv *iv)
198 kfree(iv->ptr); 207 kfree(iv->ptr);
199 kfree(iv->bitlock); 208 kfree(iv->bitlock);
200 if (iv->flags & AIRQ_IV_CACHELINE) 209 if (iv->flags & AIRQ_IV_CACHELINE)
201 kmem_cache_free(airq_iv_cache, iv->vector); 210 dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
202 else 211 else
203 kfree(iv->vector); 212 cio_dma_free(iv->vector, iv_size(iv->bits));
204 kfree(iv->avail); 213 kfree(iv->avail);
205 kfree(iv); 214 kfree(iv);
206} 215}
@@ -295,12 +304,12 @@ unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
295} 304}
296EXPORT_SYMBOL(airq_iv_scan); 305EXPORT_SYMBOL(airq_iv_scan);
297 306
298static int __init airq_init(void) 307int __init airq_init(void)
299{ 308{
300 airq_iv_cache = kmem_cache_create("airq_iv_cache", cache_line_size(), 309 airq_iv_cache = dma_pool_create("airq_iv_cache", cio_get_dma_css_dev(),
301 cache_line_size(), 0, NULL); 310 cache_line_size(),
311 cache_line_size(), PAGE_SIZE);
302 if (!airq_iv_cache) 312 if (!airq_iv_cache)
303 return -ENOMEM; 313 return -ENOMEM;
304 return 0; 314 return 0;
305} 315}
306subsys_initcall(airq_init);
diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c
index 603268a33ea1..73582a0a2622 100644
--- a/drivers/s390/cio/ccwreq.c
+++ b/drivers/s390/cio/ccwreq.c
@@ -63,7 +63,7 @@ static void ccwreq_stop(struct ccw_device *cdev, int rc)
63 return; 63 return;
64 req->done = 1; 64 req->done = 1;
65 ccw_device_set_timeout(cdev, 0); 65 ccw_device_set_timeout(cdev, 0);
66 memset(&cdev->private->irb, 0, sizeof(struct irb)); 66 memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
67 if (rc && rc != -ENODEV && req->drc) 67 if (rc && rc != -ENODEV && req->drc)
68 rc = req->drc; 68 rc = req->drc;
69 req->callback(cdev, req->data, rc); 69 req->callback(cdev, req->data, rc);
@@ -86,7 +86,7 @@ static void ccwreq_do(struct ccw_device *cdev)
86 continue; 86 continue;
87 } 87 }
88 /* Perform start function. */ 88 /* Perform start function. */
89 memset(&cdev->private->irb, 0, sizeof(struct irb)); 89 memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
90 rc = cio_start(sch, cp, (u8) req->mask); 90 rc = cio_start(sch, cp, (u8) req->mask);
91 if (rc == 0) { 91 if (rc == 0) {
92 /* I/O started successfully. */ 92 /* I/O started successfully. */
@@ -169,7 +169,7 @@ int ccw_request_cancel(struct ccw_device *cdev)
169 */ 169 */
170static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb) 170static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
171{ 171{
172 struct irb *irb = &cdev->private->irb; 172 struct irb *irb = &cdev->private->dma_area->irb;
173 struct cmd_scsw *scsw = &irb->scsw.cmd; 173 struct cmd_scsw *scsw = &irb->scsw.cmd;
174 enum uc_todo todo; 174 enum uc_todo todo;
175 175
@@ -187,7 +187,8 @@ static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
187 CIO_TRACE_EVENT(2, "sensedata"); 187 CIO_TRACE_EVENT(2, "sensedata");
188 CIO_HEX_EVENT(2, &cdev->private->dev_id, 188 CIO_HEX_EVENT(2, &cdev->private->dev_id,
189 sizeof(struct ccw_dev_id)); 189 sizeof(struct ccw_dev_id));
190 CIO_HEX_EVENT(2, &cdev->private->irb.ecw, SENSE_MAX_COUNT); 190 CIO_HEX_EVENT(2, &cdev->private->dma_area->irb.ecw,
191 SENSE_MAX_COUNT);
191 /* Check for command reject. */ 192 /* Check for command reject. */
192 if (irb->ecw[0] & SNS0_CMD_REJECT) 193 if (irb->ecw[0] & SNS0_CMD_REJECT)
193 return IO_REJECTED; 194 return IO_REJECTED;
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index a835b31aad99..6392a1b95b02 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -323,36 +323,6 @@ struct chsc_sei {
323} __packed __aligned(PAGE_SIZE); 323} __packed __aligned(PAGE_SIZE);
324 324
325/* 325/*
326 * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
327 */
328
329#define ND_VALIDITY_VALID 0
330#define ND_VALIDITY_OUTDATED 1
331#define ND_VALIDITY_INVALID 2
332
333struct node_descriptor {
334 /* Flags. */
335 union {
336 struct {
337 u32 validity:3;
338 u32 reserved:5;
339 } __packed;
340 u8 byte0;
341 } __packed;
342
343 /* Node parameters. */
344 u32 params:24;
345
346 /* Node ID. */
347 char type[6];
348 char model[3];
349 char manufacturer[3];
350 char plant[2];
351 char seq[12];
352 u16 tag;
353} __packed;
354
355/*
356 * Link Incident Record as defined in SA22-7202, "ESCON I/O Interface" 326 * Link Incident Record as defined in SA22-7202, "ESCON I/O Interface"
357 */ 327 */
358 328
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 06a91743335a..ba7d2480613b 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -113,6 +113,7 @@ struct subchannel {
113 enum sch_todo todo; 113 enum sch_todo todo;
114 struct work_struct todo_work; 114 struct work_struct todo_work;
115 struct schib_config config; 115 struct schib_config config;
116 char *driver_override; /* Driver name to force a match */
116} __attribute__ ((aligned(8))); 117} __attribute__ ((aligned(8)));
117 118
118DECLARE_PER_CPU_ALIGNED(struct irb, cio_irb); 119DECLARE_PER_CPU_ALIGNED(struct irb, cio_irb);
@@ -135,6 +136,8 @@ extern int cio_commit_config(struct subchannel *sch);
135int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key); 136int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key);
136int cio_tm_intrg(struct subchannel *sch); 137int cio_tm_intrg(struct subchannel *sch);
137 138
139extern int __init airq_init(void);
140
138/* Use with care. */ 141/* Use with care. */
139#ifdef CONFIG_CCW_CONSOLE 142#ifdef CONFIG_CCW_CONSOLE
140extern struct subchannel *cio_probe_console(void); 143extern struct subchannel *cio_probe_console(void);
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index aea502922646..e1f2d0eed544 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -20,6 +20,8 @@
20#include <linux/reboot.h> 20#include <linux/reboot.h>
21#include <linux/suspend.h> 21#include <linux/suspend.h>
22#include <linux/proc_fs.h> 22#include <linux/proc_fs.h>
23#include <linux/genalloc.h>
24#include <linux/dma-mapping.h>
23#include <asm/isc.h> 25#include <asm/isc.h>
24#include <asm/crw.h> 26#include <asm/crw.h>
25 27
@@ -165,6 +167,7 @@ static void css_subchannel_release(struct device *dev)
165 167
166 sch->config.intparm = 0; 168 sch->config.intparm = 0;
167 cio_commit_config(sch); 169 cio_commit_config(sch);
170 kfree(sch->driver_override);
168 kfree(sch->lock); 171 kfree(sch->lock);
169 kfree(sch); 172 kfree(sch);
170} 173}
@@ -224,6 +227,12 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid,
224 INIT_WORK(&sch->todo_work, css_sch_todo); 227 INIT_WORK(&sch->todo_work, css_sch_todo);
225 sch->dev.release = &css_subchannel_release; 228 sch->dev.release = &css_subchannel_release;
226 device_initialize(&sch->dev); 229 device_initialize(&sch->dev);
230 /*
231 * The physical addresses of some the dma structures that can
232 * belong to a subchannel need to fit 31 bit width (e.g. ccw).
233 */
234 sch->dev.coherent_dma_mask = DMA_BIT_MASK(31);
235 sch->dev.dma_mask = &sch->dev.coherent_dma_mask;
227 return sch; 236 return sch;
228 237
229err: 238err:
@@ -315,9 +324,57 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
315 324
316static DEVICE_ATTR_RO(modalias); 325static DEVICE_ATTR_RO(modalias);
317 326
327static ssize_t driver_override_store(struct device *dev,
328 struct device_attribute *attr,
329 const char *buf, size_t count)
330{
331 struct subchannel *sch = to_subchannel(dev);
332 char *driver_override, *old, *cp;
333
334 /* We need to keep extra room for a newline */
335 if (count >= (PAGE_SIZE - 1))
336 return -EINVAL;
337
338 driver_override = kstrndup(buf, count, GFP_KERNEL);
339 if (!driver_override)
340 return -ENOMEM;
341
342 cp = strchr(driver_override, '\n');
343 if (cp)
344 *cp = '\0';
345
346 device_lock(dev);
347 old = sch->driver_override;
348 if (strlen(driver_override)) {
349 sch->driver_override = driver_override;
350 } else {
351 kfree(driver_override);
352 sch->driver_override = NULL;
353 }
354 device_unlock(dev);
355
356 kfree(old);
357
358 return count;
359}
360
361static ssize_t driver_override_show(struct device *dev,
362 struct device_attribute *attr, char *buf)
363{
364 struct subchannel *sch = to_subchannel(dev);
365 ssize_t len;
366
367 device_lock(dev);
368 len = snprintf(buf, PAGE_SIZE, "%s\n", sch->driver_override);
369 device_unlock(dev);
370 return len;
371}
372static DEVICE_ATTR_RW(driver_override);
373
318static struct attribute *subch_attrs[] = { 374static struct attribute *subch_attrs[] = {
319 &dev_attr_type.attr, 375 &dev_attr_type.attr,
320 &dev_attr_modalias.attr, 376 &dev_attr_modalias.attr,
377 &dev_attr_driver_override.attr,
321 NULL, 378 NULL,
322}; 379};
323 380
@@ -899,6 +956,13 @@ static int __init setup_css(int nr)
899 dev_set_name(&css->device, "css%x", nr); 956 dev_set_name(&css->device, "css%x", nr);
900 css->device.groups = cssdev_attr_groups; 957 css->device.groups = cssdev_attr_groups;
901 css->device.release = channel_subsystem_release; 958 css->device.release = channel_subsystem_release;
959 /*
960 * We currently allocate notifier bits with this (using
961 * css->device as the device argument with the DMA API)
962 * and are fine with 64 bit addresses.
963 */
964 css->device.coherent_dma_mask = DMA_BIT_MASK(64);
965 css->device.dma_mask = &css->device.coherent_dma_mask;
902 966
903 mutex_init(&css->mutex); 967 mutex_init(&css->mutex);
904 css->cssid = chsc_get_cssid(nr); 968 css->cssid = chsc_get_cssid(nr);
@@ -1018,6 +1082,111 @@ static struct notifier_block css_power_notifier = {
1018 .notifier_call = css_power_event, 1082 .notifier_call = css_power_event,
1019}; 1083};
1020 1084
1085#define CIO_DMA_GFP (GFP_KERNEL | __GFP_ZERO)
1086static struct gen_pool *cio_dma_pool;
1087
1088/* Currently cio supports only a single css */
1089struct device *cio_get_dma_css_dev(void)
1090{
1091 return &channel_subsystems[0]->device;
1092}
1093
1094struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages)
1095{
1096 struct gen_pool *gp_dma;
1097 void *cpu_addr;
1098 dma_addr_t dma_addr;
1099 int i;
1100
1101 gp_dma = gen_pool_create(3, -1);
1102 if (!gp_dma)
1103 return NULL;
1104 for (i = 0; i < nr_pages; ++i) {
1105 cpu_addr = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr,
1106 CIO_DMA_GFP);
1107 if (!cpu_addr)
1108 return gp_dma;
1109 gen_pool_add_virt(gp_dma, (unsigned long) cpu_addr,
1110 dma_addr, PAGE_SIZE, -1);
1111 }
1112 return gp_dma;
1113}
1114
1115static void __gp_dma_free_dma(struct gen_pool *pool,
1116 struct gen_pool_chunk *chunk, void *data)
1117{
1118 size_t chunk_size = chunk->end_addr - chunk->start_addr + 1;
1119
1120 dma_free_coherent((struct device *) data, chunk_size,
1121 (void *) chunk->start_addr,
1122 (dma_addr_t) chunk->phys_addr);
1123}
1124
1125void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev)
1126{
1127 if (!gp_dma)
1128 return;
1129 /* this is quite ugly but no better idea */
1130 gen_pool_for_each_chunk(gp_dma, __gp_dma_free_dma, dma_dev);
1131 gen_pool_destroy(gp_dma);
1132}
1133
1134static int cio_dma_pool_init(void)
1135{
1136 /* No need to free up the resources: compiled in */
1137 cio_dma_pool = cio_gp_dma_create(cio_get_dma_css_dev(), 1);
1138 if (!cio_dma_pool)
1139 return -ENOMEM;
1140 return 0;
1141}
1142
1143void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
1144 size_t size)
1145{
1146 dma_addr_t dma_addr;
1147 unsigned long addr;
1148 size_t chunk_size;
1149
1150 if (!gp_dma)
1151 return NULL;
1152 addr = gen_pool_alloc(gp_dma, size);
1153 while (!addr) {
1154 chunk_size = round_up(size, PAGE_SIZE);
1155 addr = (unsigned long) dma_alloc_coherent(dma_dev,
1156 chunk_size, &dma_addr, CIO_DMA_GFP);
1157 if (!addr)
1158 return NULL;
1159 gen_pool_add_virt(gp_dma, addr, dma_addr, chunk_size, -1);
1160 addr = gen_pool_alloc(gp_dma, size);
1161 }
1162 return (void *) addr;
1163}
1164
1165void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size)
1166{
1167 if (!cpu_addr)
1168 return;
1169 memset(cpu_addr, 0, size);
1170 gen_pool_free(gp_dma, (unsigned long) cpu_addr, size);
1171}
1172
1173/*
1174 * Allocate dma memory from the css global pool. Intended for memory not
1175 * specific to any single device within the css. The allocated memory
1176 * is not guaranteed to be 31-bit addressable.
1177 *
1178 * Caution: Not suitable for early stuff like console.
1179 */
1180void *cio_dma_zalloc(size_t size)
1181{
1182 return cio_gp_dma_zalloc(cio_dma_pool, cio_get_dma_css_dev(), size);
1183}
1184
1185void cio_dma_free(void *cpu_addr, size_t size)
1186{
1187 cio_gp_dma_free(cio_dma_pool, cpu_addr, size);
1188}
1189
1021/* 1190/*
1022 * Now that the driver core is running, we can setup our channel subsystem. 1191 * Now that the driver core is running, we can setup our channel subsystem.
1023 * The struct subchannel's are created during probing. 1192 * The struct subchannel's are created during probing.
@@ -1059,16 +1228,22 @@ static int __init css_bus_init(void)
1059 if (ret) 1228 if (ret)
1060 goto out_unregister; 1229 goto out_unregister;
1061 ret = register_pm_notifier(&css_power_notifier); 1230 ret = register_pm_notifier(&css_power_notifier);
1062 if (ret) { 1231 if (ret)
1063 unregister_reboot_notifier(&css_reboot_notifier); 1232 goto out_unregister_rn;
1064 goto out_unregister; 1233 ret = cio_dma_pool_init();
1065 } 1234 if (ret)
1235 goto out_unregister_pmn;
1236 airq_init();
1066 css_init_done = 1; 1237 css_init_done = 1;
1067 1238
1068 /* Enable default isc for I/O subchannels. */ 1239 /* Enable default isc for I/O subchannels. */
1069 isc_register(IO_SCH_ISC); 1240 isc_register(IO_SCH_ISC);
1070 1241
1071 return 0; 1242 return 0;
1243out_unregister_pmn:
1244 unregister_pm_notifier(&css_power_notifier);
1245out_unregister_rn:
1246 unregister_reboot_notifier(&css_reboot_notifier);
1072out_unregister: 1247out_unregister:
1073 while (i-- > 0) { 1248 while (i-- > 0) {
1074 struct channel_subsystem *css = channel_subsystems[i]; 1249 struct channel_subsystem *css = channel_subsystems[i];
@@ -1222,6 +1397,10 @@ static int css_bus_match(struct device *dev, struct device_driver *drv)
1222 struct css_driver *driver = to_cssdriver(drv); 1397 struct css_driver *driver = to_cssdriver(drv);
1223 struct css_device_id *id; 1398 struct css_device_id *id;
1224 1399
1400 /* When driver_override is set, only bind to the matching driver */
1401 if (sch->driver_override && strcmp(sch->driver_override, drv->name))
1402 return 0;
1403
1225 for (id = driver->subchannel_type; id->match_flags; id++) { 1404 for (id = driver->subchannel_type; id->match_flags; id++) {
1226 if (sch->st == id->type) 1405 if (sch->st == id->type)
1227 return 1; 1406 return 1;
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 1540229a37bb..9985b7484a6b 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -24,6 +24,7 @@
24#include <linux/timer.h> 24#include <linux/timer.h>
25#include <linux/kernel_stat.h> 25#include <linux/kernel_stat.h>
26#include <linux/sched/signal.h> 26#include <linux/sched/signal.h>
27#include <linux/dma-mapping.h>
27 28
28#include <asm/ccwdev.h> 29#include <asm/ccwdev.h>
29#include <asm/cio.h> 30#include <asm/cio.h>
@@ -687,6 +688,9 @@ ccw_device_release(struct device *dev)
687 struct ccw_device *cdev; 688 struct ccw_device *cdev;
688 689
689 cdev = to_ccwdev(dev); 690 cdev = to_ccwdev(dev);
691 cio_gp_dma_free(cdev->private->dma_pool, cdev->private->dma_area,
692 sizeof(*cdev->private->dma_area));
693 cio_gp_dma_destroy(cdev->private->dma_pool, &cdev->dev);
690 /* Release reference of parent subchannel. */ 694 /* Release reference of parent subchannel. */
691 put_device(cdev->dev.parent); 695 put_device(cdev->dev.parent);
692 kfree(cdev->private); 696 kfree(cdev->private);
@@ -696,15 +700,33 @@ ccw_device_release(struct device *dev)
696static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch) 700static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
697{ 701{
698 struct ccw_device *cdev; 702 struct ccw_device *cdev;
703 struct gen_pool *dma_pool;
699 704
700 cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); 705 cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
701 if (cdev) { 706 if (!cdev)
702 cdev->private = kzalloc(sizeof(struct ccw_device_private), 707 goto err_cdev;
703 GFP_KERNEL | GFP_DMA); 708 cdev->private = kzalloc(sizeof(struct ccw_device_private),
704 if (cdev->private) 709 GFP_KERNEL | GFP_DMA);
705 return cdev; 710 if (!cdev->private)
706 } 711 goto err_priv;
712 cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask;
713 cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask;
714 dma_pool = cio_gp_dma_create(&cdev->dev, 1);
715 if (!dma_pool)
716 goto err_dma_pool;
717 cdev->private->dma_pool = dma_pool;
718 cdev->private->dma_area = cio_gp_dma_zalloc(dma_pool, &cdev->dev,
719 sizeof(*cdev->private->dma_area));
720 if (!cdev->private->dma_area)
721 goto err_dma_area;
722 return cdev;
723err_dma_area:
724 cio_gp_dma_destroy(dma_pool, &cdev->dev);
725err_dma_pool:
726 kfree(cdev->private);
727err_priv:
707 kfree(cdev); 728 kfree(cdev);
729err_cdev:
708 return ERR_PTR(-ENOMEM); 730 return ERR_PTR(-ENOMEM);
709} 731}
710 732
@@ -884,7 +906,7 @@ io_subchannel_recog_done(struct ccw_device *cdev)
884 wake_up(&ccw_device_init_wq); 906 wake_up(&ccw_device_init_wq);
885 break; 907 break;
886 case DEV_STATE_OFFLINE: 908 case DEV_STATE_OFFLINE:
887 /* 909 /*
888 * We can't register the device in interrupt context so 910 * We can't register the device in interrupt context so
889 * we schedule a work item. 911 * we schedule a work item.
890 */ 912 */
@@ -1062,6 +1084,14 @@ static int io_subchannel_probe(struct subchannel *sch)
1062 if (!io_priv) 1084 if (!io_priv)
1063 goto out_schedule; 1085 goto out_schedule;
1064 1086
1087 io_priv->dma_area = dma_alloc_coherent(&sch->dev,
1088 sizeof(*io_priv->dma_area),
1089 &io_priv->dma_area_dma, GFP_KERNEL);
1090 if (!io_priv->dma_area) {
1091 kfree(io_priv);
1092 goto out_schedule;
1093 }
1094
1065 set_io_private(sch, io_priv); 1095 set_io_private(sch, io_priv);
1066 css_schedule_eval(sch->schid); 1096 css_schedule_eval(sch->schid);
1067 return 0; 1097 return 0;
@@ -1088,6 +1118,8 @@ static int io_subchannel_remove(struct subchannel *sch)
1088 set_io_private(sch, NULL); 1118 set_io_private(sch, NULL);
1089 spin_unlock_irq(sch->lock); 1119 spin_unlock_irq(sch->lock);
1090out_free: 1120out_free:
1121 dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
1122 io_priv->dma_area, io_priv->dma_area_dma);
1091 kfree(io_priv); 1123 kfree(io_priv);
1092 sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group); 1124 sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
1093 return 0; 1125 return 0;
@@ -1593,13 +1625,19 @@ struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv)
1593 return ERR_CAST(sch); 1625 return ERR_CAST(sch);
1594 1626
1595 io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA); 1627 io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA);
1596 if (!io_priv) { 1628 if (!io_priv)
1597 put_device(&sch->dev); 1629 goto err_priv;
1598 return ERR_PTR(-ENOMEM); 1630 io_priv->dma_area = dma_alloc_coherent(&sch->dev,
1599 } 1631 sizeof(*io_priv->dma_area),
1632 &io_priv->dma_area_dma, GFP_KERNEL);
1633 if (!io_priv->dma_area)
1634 goto err_dma_area;
1600 set_io_private(sch, io_priv); 1635 set_io_private(sch, io_priv);
1601 cdev = io_subchannel_create_ccwdev(sch); 1636 cdev = io_subchannel_create_ccwdev(sch);
1602 if (IS_ERR(cdev)) { 1637 if (IS_ERR(cdev)) {
1638 dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
1639 io_priv->dma_area, io_priv->dma_area_dma);
1640 set_io_private(sch, NULL);
1603 put_device(&sch->dev); 1641 put_device(&sch->dev);
1604 kfree(io_priv); 1642 kfree(io_priv);
1605 return cdev; 1643 return cdev;
@@ -1607,6 +1645,12 @@ struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv)
1607 cdev->drv = drv; 1645 cdev->drv = drv;
1608 ccw_device_set_int_class(cdev); 1646 ccw_device_set_int_class(cdev);
1609 return cdev; 1647 return cdev;
1648
1649err_dma_area:
1650 kfree(io_priv);
1651err_priv:
1652 put_device(&sch->dev);
1653 return ERR_PTR(-ENOMEM);
1610} 1654}
1611 1655
1612void __init ccw_device_destroy_console(struct ccw_device *cdev) 1656void __init ccw_device_destroy_console(struct ccw_device *cdev)
@@ -1617,6 +1661,8 @@ void __init ccw_device_destroy_console(struct ccw_device *cdev)
1617 set_io_private(sch, NULL); 1661 set_io_private(sch, NULL);
1618 put_device(&sch->dev); 1662 put_device(&sch->dev);
1619 put_device(&cdev->dev); 1663 put_device(&cdev->dev);
1664 dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
1665 io_priv->dma_area, io_priv->dma_area_dma);
1620 kfree(io_priv); 1666 kfree(io_priv);
1621} 1667}
1622 1668
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 9169af7dbb43..8fc267324ebb 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -67,8 +67,10 @@ static void ccw_timeout_log(struct ccw_device *cdev)
67 sizeof(struct tcw), 0); 67 sizeof(struct tcw), 0);
68 } else { 68 } else {
69 printk(KERN_WARNING "cio: orb indicates command mode\n"); 69 printk(KERN_WARNING "cio: orb indicates command mode\n");
70 if ((void *)(addr_t)orb->cmd.cpa == &private->sense_ccw || 70 if ((void *)(addr_t)orb->cmd.cpa ==
71 (void *)(addr_t)orb->cmd.cpa == cdev->private->iccws) 71 &private->dma_area->sense_ccw ||
72 (void *)(addr_t)orb->cmd.cpa ==
73 cdev->private->dma_area->iccws)
72 printk(KERN_WARNING "cio: last channel program " 74 printk(KERN_WARNING "cio: last channel program "
73 "(intern):\n"); 75 "(intern):\n");
74 else 76 else
@@ -143,18 +145,22 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
143void ccw_device_update_sense_data(struct ccw_device *cdev) 145void ccw_device_update_sense_data(struct ccw_device *cdev)
144{ 146{
145 memset(&cdev->id, 0, sizeof(cdev->id)); 147 memset(&cdev->id, 0, sizeof(cdev->id));
146 cdev->id.cu_type = cdev->private->senseid.cu_type; 148 cdev->id.cu_type = cdev->private->dma_area->senseid.cu_type;
147 cdev->id.cu_model = cdev->private->senseid.cu_model; 149 cdev->id.cu_model = cdev->private->dma_area->senseid.cu_model;
148 cdev->id.dev_type = cdev->private->senseid.dev_type; 150 cdev->id.dev_type = cdev->private->dma_area->senseid.dev_type;
149 cdev->id.dev_model = cdev->private->senseid.dev_model; 151 cdev->id.dev_model = cdev->private->dma_area->senseid.dev_model;
150} 152}
151 153
152int ccw_device_test_sense_data(struct ccw_device *cdev) 154int ccw_device_test_sense_data(struct ccw_device *cdev)
153{ 155{
154 return cdev->id.cu_type == cdev->private->senseid.cu_type && 156 return cdev->id.cu_type ==
155 cdev->id.cu_model == cdev->private->senseid.cu_model && 157 cdev->private->dma_area->senseid.cu_type &&
156 cdev->id.dev_type == cdev->private->senseid.dev_type && 158 cdev->id.cu_model ==
157 cdev->id.dev_model == cdev->private->senseid.dev_model; 159 cdev->private->dma_area->senseid.cu_model &&
160 cdev->id.dev_type ==
161 cdev->private->dma_area->senseid.dev_type &&
162 cdev->id.dev_model ==
163 cdev->private->dma_area->senseid.dev_model;
158} 164}
159 165
160/* 166/*
@@ -342,7 +348,7 @@ ccw_device_done(struct ccw_device *cdev, int state)
342 cio_disable_subchannel(sch); 348 cio_disable_subchannel(sch);
343 349
344 /* Reset device status. */ 350 /* Reset device status. */
345 memset(&cdev->private->irb, 0, sizeof(struct irb)); 351 memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
346 352
347 cdev->private->state = state; 353 cdev->private->state = state;
348 354
@@ -509,13 +515,14 @@ callback:
509 ccw_device_done(cdev, DEV_STATE_ONLINE); 515 ccw_device_done(cdev, DEV_STATE_ONLINE);
510 /* Deliver fake irb to device driver, if needed. */ 516 /* Deliver fake irb to device driver, if needed. */
511 if (cdev->private->flags.fake_irb) { 517 if (cdev->private->flags.fake_irb) {
512 create_fake_irb(&cdev->private->irb, 518 create_fake_irb(&cdev->private->dma_area->irb,
513 cdev->private->flags.fake_irb); 519 cdev->private->flags.fake_irb);
514 cdev->private->flags.fake_irb = 0; 520 cdev->private->flags.fake_irb = 0;
515 if (cdev->handler) 521 if (cdev->handler)
516 cdev->handler(cdev, cdev->private->intparm, 522 cdev->handler(cdev, cdev->private->intparm,
517 &cdev->private->irb); 523 &cdev->private->dma_area->irb);
518 memset(&cdev->private->irb, 0, sizeof(struct irb)); 524 memset(&cdev->private->dma_area->irb, 0,
525 sizeof(struct irb));
519 } 526 }
520 ccw_device_report_path_events(cdev); 527 ccw_device_report_path_events(cdev);
521 ccw_device_handle_broken_paths(cdev); 528 ccw_device_handle_broken_paths(cdev);
@@ -672,7 +679,8 @@ ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event)
672 679
673 if (scsw_actl(&sch->schib.scsw) != 0 || 680 if (scsw_actl(&sch->schib.scsw) != 0 ||
674 (scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_STATUS_PEND) || 681 (scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_STATUS_PEND) ||
675 (scsw_stctl(&cdev->private->irb.scsw) & SCSW_STCTL_STATUS_PEND)) { 682 (scsw_stctl(&cdev->private->dma_area->irb.scsw) &
683 SCSW_STCTL_STATUS_PEND)) {
676 /* 684 /*
677 * No final status yet or final status not yet delivered 685 * No final status yet or final status not yet delivered
678 * to the device driver. Can't do path verification now, 686 * to the device driver. Can't do path verification now,
@@ -719,7 +727,7 @@ static int ccw_device_call_handler(struct ccw_device *cdev)
719 * - fast notification was requested (primary status) 727 * - fast notification was requested (primary status)
720 * - unsolicited interrupts 728 * - unsolicited interrupts
721 */ 729 */
722 stctl = scsw_stctl(&cdev->private->irb.scsw); 730 stctl = scsw_stctl(&cdev->private->dma_area->irb.scsw);
723 ending_status = (stctl & SCSW_STCTL_SEC_STATUS) || 731 ending_status = (stctl & SCSW_STCTL_SEC_STATUS) ||
724 (stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) || 732 (stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) ||
725 (stctl == SCSW_STCTL_STATUS_PEND); 733 (stctl == SCSW_STCTL_STATUS_PEND);
@@ -735,9 +743,9 @@ static int ccw_device_call_handler(struct ccw_device *cdev)
735 743
736 if (cdev->handler) 744 if (cdev->handler)
737 cdev->handler(cdev, cdev->private->intparm, 745 cdev->handler(cdev, cdev->private->intparm,
738 &cdev->private->irb); 746 &cdev->private->dma_area->irb);
739 747
740 memset(&cdev->private->irb, 0, sizeof(struct irb)); 748 memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
741 return 1; 749 return 1;
742} 750}
743 751
@@ -759,7 +767,8 @@ ccw_device_irq(struct ccw_device *cdev, enum dev_event dev_event)
759 /* Unit check but no sense data. Need basic sense. */ 767 /* Unit check but no sense data. Need basic sense. */
760 if (ccw_device_do_sense(cdev, irb) != 0) 768 if (ccw_device_do_sense(cdev, irb) != 0)
761 goto call_handler_unsol; 769 goto call_handler_unsol;
762 memcpy(&cdev->private->irb, irb, sizeof(struct irb)); 770 memcpy(&cdev->private->dma_area->irb, irb,
771 sizeof(struct irb));
763 cdev->private->state = DEV_STATE_W4SENSE; 772 cdev->private->state = DEV_STATE_W4SENSE;
764 cdev->private->intparm = 0; 773 cdev->private->intparm = 0;
765 return; 774 return;
@@ -842,7 +851,7 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event)
842 if (scsw_fctl(&irb->scsw) & 851 if (scsw_fctl(&irb->scsw) &
843 (SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) { 852 (SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) {
844 cdev->private->flags.dosense = 0; 853 cdev->private->flags.dosense = 0;
845 memset(&cdev->private->irb, 0, sizeof(struct irb)); 854 memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
846 ccw_device_accumulate_irb(cdev, irb); 855 ccw_device_accumulate_irb(cdev, irb);
847 goto call_handler; 856 goto call_handler;
848 } 857 }
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index f6df83a9dfbb..740996d0dc8c 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -99,7 +99,7 @@ static int diag210_to_senseid(struct senseid *senseid, struct diag210 *diag)
99static int diag210_get_dev_info(struct ccw_device *cdev) 99static int diag210_get_dev_info(struct ccw_device *cdev)
100{ 100{
101 struct ccw_dev_id *dev_id = &cdev->private->dev_id; 101 struct ccw_dev_id *dev_id = &cdev->private->dev_id;
102 struct senseid *senseid = &cdev->private->senseid; 102 struct senseid *senseid = &cdev->private->dma_area->senseid;
103 struct diag210 diag_data; 103 struct diag210 diag_data;
104 int rc; 104 int rc;
105 105
@@ -134,8 +134,10 @@ err_failed:
134static void snsid_init(struct ccw_device *cdev) 134static void snsid_init(struct ccw_device *cdev)
135{ 135{
136 cdev->private->flags.esid = 0; 136 cdev->private->flags.esid = 0;
137 memset(&cdev->private->senseid, 0, sizeof(cdev->private->senseid)); 137
138 cdev->private->senseid.cu_type = 0xffff; 138 memset(&cdev->private->dma_area->senseid, 0,
139 sizeof(cdev->private->dma_area->senseid));
140 cdev->private->dma_area->senseid.cu_type = 0xffff;
139} 141}
140 142
141/* 143/*
@@ -143,16 +145,16 @@ static void snsid_init(struct ccw_device *cdev)
143 */ 145 */
144static int snsid_check(struct ccw_device *cdev, void *data) 146static int snsid_check(struct ccw_device *cdev, void *data)
145{ 147{
146 struct cmd_scsw *scsw = &cdev->private->irb.scsw.cmd; 148 struct cmd_scsw *scsw = &cdev->private->dma_area->irb.scsw.cmd;
147 int len = sizeof(struct senseid) - scsw->count; 149 int len = sizeof(struct senseid) - scsw->count;
148 150
149 /* Check for incomplete SENSE ID data. */ 151 /* Check for incomplete SENSE ID data. */
150 if (len < SENSE_ID_MIN_LEN) 152 if (len < SENSE_ID_MIN_LEN)
151 goto out_restart; 153 goto out_restart;
152 if (cdev->private->senseid.cu_type == 0xffff) 154 if (cdev->private->dma_area->senseid.cu_type == 0xffff)
153 goto out_restart; 155 goto out_restart;
154 /* Check for incompatible SENSE ID data. */ 156 /* Check for incompatible SENSE ID data. */
155 if (cdev->private->senseid.reserved != 0xff) 157 if (cdev->private->dma_area->senseid.reserved != 0xff)
156 return -EOPNOTSUPP; 158 return -EOPNOTSUPP;
157 /* Check for extended-identification information. */ 159 /* Check for extended-identification information. */
158 if (len > SENSE_ID_BASIC_LEN) 160 if (len > SENSE_ID_BASIC_LEN)
@@ -170,7 +172,7 @@ out_restart:
170static void snsid_callback(struct ccw_device *cdev, void *data, int rc) 172static void snsid_callback(struct ccw_device *cdev, void *data, int rc)
171{ 173{
172 struct ccw_dev_id *id = &cdev->private->dev_id; 174 struct ccw_dev_id *id = &cdev->private->dev_id;
173 struct senseid *senseid = &cdev->private->senseid; 175 struct senseid *senseid = &cdev->private->dma_area->senseid;
174 int vm = 0; 176 int vm = 0;
175 177
176 if (rc && MACHINE_IS_VM) { 178 if (rc && MACHINE_IS_VM) {
@@ -200,7 +202,7 @@ void ccw_device_sense_id_start(struct ccw_device *cdev)
200{ 202{
201 struct subchannel *sch = to_subchannel(cdev->dev.parent); 203 struct subchannel *sch = to_subchannel(cdev->dev.parent);
202 struct ccw_request *req = &cdev->private->req; 204 struct ccw_request *req = &cdev->private->req;
203 struct ccw1 *cp = cdev->private->iccws; 205 struct ccw1 *cp = cdev->private->dma_area->iccws;
204 206
205 CIO_TRACE_EVENT(4, "snsid"); 207 CIO_TRACE_EVENT(4, "snsid");
206 CIO_HEX_EVENT(4, &cdev->private->dev_id, sizeof(cdev->private->dev_id)); 208 CIO_HEX_EVENT(4, &cdev->private->dev_id, sizeof(cdev->private->dev_id));
@@ -208,7 +210,7 @@ void ccw_device_sense_id_start(struct ccw_device *cdev)
208 snsid_init(cdev); 210 snsid_init(cdev);
209 /* Channel program setup. */ 211 /* Channel program setup. */
210 cp->cmd_code = CCW_CMD_SENSE_ID; 212 cp->cmd_code = CCW_CMD_SENSE_ID;
211 cp->cda = (u32) (addr_t) &cdev->private->senseid; 213 cp->cda = (u32) (addr_t) &cdev->private->dma_area->senseid;
212 cp->count = sizeof(struct senseid); 214 cp->count = sizeof(struct senseid);
213 cp->flags = CCW_FLAG_SLI; 215 cp->flags = CCW_FLAG_SLI;
214 /* Request setup. */ 216 /* Request setup. */
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 4435ae0b3027..d722458c5928 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -429,8 +429,8 @@ struct ciw *ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct)
429 if (cdev->private->flags.esid == 0) 429 if (cdev->private->flags.esid == 0)
430 return NULL; 430 return NULL;
431 for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++) 431 for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++)
432 if (cdev->private->senseid.ciw[ciw_cnt].ct == ct) 432 if (cdev->private->dma_area->senseid.ciw[ciw_cnt].ct == ct)
433 return cdev->private->senseid.ciw + ciw_cnt; 433 return cdev->private->dma_area->senseid.ciw + ciw_cnt;
434 return NULL; 434 return NULL;
435} 435}
436 436
@@ -699,6 +699,23 @@ void ccw_device_get_schid(struct ccw_device *cdev, struct subchannel_id *schid)
699} 699}
700EXPORT_SYMBOL_GPL(ccw_device_get_schid); 700EXPORT_SYMBOL_GPL(ccw_device_get_schid);
701 701
702/*
703 * Allocate zeroed dma coherent 31 bit addressable memory using
704 * the subchannels dma pool. Maximal size of allocation supported
705 * is PAGE_SIZE.
706 */
707void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size)
708{
709 return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size);
710}
711EXPORT_SYMBOL(ccw_device_dma_zalloc);
712
713void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size)
714{
715 cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size);
716}
717EXPORT_SYMBOL(ccw_device_dma_free);
718
702EXPORT_SYMBOL(ccw_device_set_options_mask); 719EXPORT_SYMBOL(ccw_device_set_options_mask);
703EXPORT_SYMBOL(ccw_device_set_options); 720EXPORT_SYMBOL(ccw_device_set_options);
704EXPORT_SYMBOL(ccw_device_clear_options); 721EXPORT_SYMBOL(ccw_device_clear_options);
diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index d30a3babf176..767a85635a0f 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -57,7 +57,7 @@ out:
57static void nop_build_cp(struct ccw_device *cdev) 57static void nop_build_cp(struct ccw_device *cdev)
58{ 58{
59 struct ccw_request *req = &cdev->private->req; 59 struct ccw_request *req = &cdev->private->req;
60 struct ccw1 *cp = cdev->private->iccws; 60 struct ccw1 *cp = cdev->private->dma_area->iccws;
61 61
62 cp->cmd_code = CCW_CMD_NOOP; 62 cp->cmd_code = CCW_CMD_NOOP;
63 cp->cda = 0; 63 cp->cda = 0;
@@ -134,9 +134,9 @@ err:
134static void spid_build_cp(struct ccw_device *cdev, u8 fn) 134static void spid_build_cp(struct ccw_device *cdev, u8 fn)
135{ 135{
136 struct ccw_request *req = &cdev->private->req; 136 struct ccw_request *req = &cdev->private->req;
137 struct ccw1 *cp = cdev->private->iccws; 137 struct ccw1 *cp = cdev->private->dma_area->iccws;
138 int i = pathmask_to_pos(req->lpm); 138 int i = pathmask_to_pos(req->lpm);
139 struct pgid *pgid = &cdev->private->pgid[i]; 139 struct pgid *pgid = &cdev->private->dma_area->pgid[i];
140 140
141 pgid->inf.fc = fn; 141 pgid->inf.fc = fn;
142 cp->cmd_code = CCW_CMD_SET_PGID; 142 cp->cmd_code = CCW_CMD_SET_PGID;
@@ -300,7 +300,7 @@ static int pgid_cmp(struct pgid *p1, struct pgid *p2)
300static void pgid_analyze(struct ccw_device *cdev, struct pgid **p, 300static void pgid_analyze(struct ccw_device *cdev, struct pgid **p,
301 int *mismatch, u8 *reserved, u8 *reset) 301 int *mismatch, u8 *reserved, u8 *reset)
302{ 302{
303 struct pgid *pgid = &cdev->private->pgid[0]; 303 struct pgid *pgid = &cdev->private->dma_area->pgid[0];
304 struct pgid *first = NULL; 304 struct pgid *first = NULL;
305 int lpm; 305 int lpm;
306 int i; 306 int i;
@@ -342,7 +342,7 @@ static u8 pgid_to_donepm(struct ccw_device *cdev)
342 lpm = 0x80 >> i; 342 lpm = 0x80 >> i;
343 if ((cdev->private->pgid_valid_mask & lpm) == 0) 343 if ((cdev->private->pgid_valid_mask & lpm) == 0)
344 continue; 344 continue;
345 pgid = &cdev->private->pgid[i]; 345 pgid = &cdev->private->dma_area->pgid[i];
346 if (sch->opm & lpm) { 346 if (sch->opm & lpm) {
347 if (pgid->inf.ps.state1 != SNID_STATE1_GROUPED) 347 if (pgid->inf.ps.state1 != SNID_STATE1_GROUPED)
348 continue; 348 continue;
@@ -368,7 +368,8 @@ static void pgid_fill(struct ccw_device *cdev, struct pgid *pgid)
368 int i; 368 int i;
369 369
370 for (i = 0; i < 8; i++) 370 for (i = 0; i < 8; i++)
371 memcpy(&cdev->private->pgid[i], pgid, sizeof(struct pgid)); 371 memcpy(&cdev->private->dma_area->pgid[i], pgid,
372 sizeof(struct pgid));
372} 373}
373 374
374/* 375/*
@@ -435,12 +436,12 @@ out:
435static void snid_build_cp(struct ccw_device *cdev) 436static void snid_build_cp(struct ccw_device *cdev)
436{ 437{
437 struct ccw_request *req = &cdev->private->req; 438 struct ccw_request *req = &cdev->private->req;
438 struct ccw1 *cp = cdev->private->iccws; 439 struct ccw1 *cp = cdev->private->dma_area->iccws;
439 int i = pathmask_to_pos(req->lpm); 440 int i = pathmask_to_pos(req->lpm);
440 441
441 /* Channel program setup. */ 442 /* Channel program setup. */
442 cp->cmd_code = CCW_CMD_SENSE_PGID; 443 cp->cmd_code = CCW_CMD_SENSE_PGID;
443 cp->cda = (u32) (addr_t) &cdev->private->pgid[i]; 444 cp->cda = (u32) (addr_t) &cdev->private->dma_area->pgid[i];
444 cp->count = sizeof(struct pgid); 445 cp->count = sizeof(struct pgid);
445 cp->flags = CCW_FLAG_SLI; 446 cp->flags = CCW_FLAG_SLI;
446 req->cp = cp; 447 req->cp = cp;
@@ -516,7 +517,8 @@ static void verify_start(struct ccw_device *cdev)
516 sch->lpm = sch->schib.pmcw.pam; 517 sch->lpm = sch->schib.pmcw.pam;
517 518
518 /* Initialize PGID data. */ 519 /* Initialize PGID data. */
519 memset(cdev->private->pgid, 0, sizeof(cdev->private->pgid)); 520 memset(cdev->private->dma_area->pgid, 0,
521 sizeof(cdev->private->dma_area->pgid));
520 cdev->private->pgid_valid_mask = 0; 522 cdev->private->pgid_valid_mask = 0;
521 cdev->private->pgid_todo_mask = sch->schib.pmcw.pam; 523 cdev->private->pgid_todo_mask = sch->schib.pmcw.pam;
522 cdev->private->path_notoper_mask = 0; 524 cdev->private->path_notoper_mask = 0;
@@ -626,7 +628,7 @@ struct stlck_data {
626static void stlck_build_cp(struct ccw_device *cdev, void *buf1, void *buf2) 628static void stlck_build_cp(struct ccw_device *cdev, void *buf1, void *buf2)
627{ 629{
628 struct ccw_request *req = &cdev->private->req; 630 struct ccw_request *req = &cdev->private->req;
629 struct ccw1 *cp = cdev->private->iccws; 631 struct ccw1 *cp = cdev->private->dma_area->iccws;
630 632
631 cp[0].cmd_code = CCW_CMD_STLCK; 633 cp[0].cmd_code = CCW_CMD_STLCK;
632 cp[0].cda = (u32) (addr_t) buf1; 634 cp[0].cda = (u32) (addr_t) buf1;
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index 7d5c7892b2c4..0bd8f2642732 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -79,15 +79,15 @@ ccw_device_accumulate_ecw(struct ccw_device *cdev, struct irb *irb)
79 * are condition that have to be met for the extended control 79 * are condition that have to be met for the extended control
80 * bit to have meaning. Sick. 80 * bit to have meaning. Sick.
81 */ 81 */
82 cdev->private->irb.scsw.cmd.ectl = 0; 82 cdev->private->dma_area->irb.scsw.cmd.ectl = 0;
83 if ((irb->scsw.cmd.stctl & SCSW_STCTL_ALERT_STATUS) && 83 if ((irb->scsw.cmd.stctl & SCSW_STCTL_ALERT_STATUS) &&
84 !(irb->scsw.cmd.stctl & SCSW_STCTL_INTER_STATUS)) 84 !(irb->scsw.cmd.stctl & SCSW_STCTL_INTER_STATUS))
85 cdev->private->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl; 85 cdev->private->dma_area->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
86 /* Check if extended control word is valid. */ 86 /* Check if extended control word is valid. */
87 if (!cdev->private->irb.scsw.cmd.ectl) 87 if (!cdev->private->dma_area->irb.scsw.cmd.ectl)
88 return; 88 return;
89 /* Copy concurrent sense / model dependent information. */ 89 /* Copy concurrent sense / model dependent information. */
90 memcpy (&cdev->private->irb.ecw, irb->ecw, sizeof (irb->ecw)); 90 memcpy(&cdev->private->dma_area->irb.ecw, irb->ecw, sizeof(irb->ecw));
91} 91}
92 92
93/* 93/*
@@ -118,7 +118,7 @@ ccw_device_accumulate_esw(struct ccw_device *cdev, struct irb *irb)
118 if (!ccw_device_accumulate_esw_valid(irb)) 118 if (!ccw_device_accumulate_esw_valid(irb))
119 return; 119 return;
120 120
121 cdev_irb = &cdev->private->irb; 121 cdev_irb = &cdev->private->dma_area->irb;
122 122
123 /* Copy last path used mask. */ 123 /* Copy last path used mask. */
124 cdev_irb->esw.esw1.lpum = irb->esw.esw1.lpum; 124 cdev_irb->esw.esw1.lpum = irb->esw.esw1.lpum;
@@ -210,7 +210,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
210 ccw_device_path_notoper(cdev); 210 ccw_device_path_notoper(cdev);
211 /* No irb accumulation for transport mode irbs. */ 211 /* No irb accumulation for transport mode irbs. */
212 if (scsw_is_tm(&irb->scsw)) { 212 if (scsw_is_tm(&irb->scsw)) {
213 memcpy(&cdev->private->irb, irb, sizeof(struct irb)); 213 memcpy(&cdev->private->dma_area->irb, irb, sizeof(struct irb));
214 return; 214 return;
215 } 215 }
216 /* 216 /*
@@ -219,7 +219,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
219 if (!scsw_is_solicited(&irb->scsw)) 219 if (!scsw_is_solicited(&irb->scsw))
220 return; 220 return;
221 221
222 cdev_irb = &cdev->private->irb; 222 cdev_irb = &cdev->private->dma_area->irb;
223 223
224 /* 224 /*
225 * If the clear function had been performed, all formerly pending 225 * If the clear function had been performed, all formerly pending
@@ -227,7 +227,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
227 * intermediate accumulated status to the device driver. 227 * intermediate accumulated status to the device driver.
228 */ 228 */
229 if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC) 229 if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC)
230 memset(&cdev->private->irb, 0, sizeof(struct irb)); 230 memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
231 231
232 /* Copy bits which are valid only for the start function. */ 232 /* Copy bits which are valid only for the start function. */
233 if (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) { 233 if (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) {
@@ -329,9 +329,9 @@ ccw_device_do_sense(struct ccw_device *cdev, struct irb *irb)
329 /* 329 /*
330 * We have ending status but no sense information. Do a basic sense. 330 * We have ending status but no sense information. Do a basic sense.
331 */ 331 */
332 sense_ccw = &to_io_private(sch)->sense_ccw; 332 sense_ccw = &to_io_private(sch)->dma_area->sense_ccw;
333 sense_ccw->cmd_code = CCW_CMD_BASIC_SENSE; 333 sense_ccw->cmd_code = CCW_CMD_BASIC_SENSE;
334 sense_ccw->cda = (__u32) __pa(cdev->private->irb.ecw); 334 sense_ccw->cda = (__u32) __pa(cdev->private->dma_area->irb.ecw);
335 sense_ccw->count = SENSE_MAX_COUNT; 335 sense_ccw->count = SENSE_MAX_COUNT;
336 sense_ccw->flags = CCW_FLAG_SLI; 336 sense_ccw->flags = CCW_FLAG_SLI;
337 337
@@ -364,7 +364,7 @@ ccw_device_accumulate_basic_sense(struct ccw_device *cdev, struct irb *irb)
364 364
365 if (!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK) && 365 if (!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK) &&
366 (irb->scsw.cmd.dstat & DEV_STAT_CHN_END)) { 366 (irb->scsw.cmd.dstat & DEV_STAT_CHN_END)) {
367 cdev->private->irb.esw.esw0.erw.cons = 1; 367 cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
368 cdev->private->flags.dosense = 0; 368 cdev->private->flags.dosense = 0;
369 } 369 }
370 /* Check if path verification is required. */ 370 /* Check if path verification is required. */
@@ -386,7 +386,7 @@ ccw_device_accumulate_and_sense(struct ccw_device *cdev, struct irb *irb)
386 /* Check for basic sense. */ 386 /* Check for basic sense. */
387 if (cdev->private->flags.dosense && 387 if (cdev->private->flags.dosense &&
388 !(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) { 388 !(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) {
389 cdev->private->irb.esw.esw0.erw.cons = 1; 389 cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
390 cdev->private->flags.dosense = 0; 390 cdev->private->flags.dosense = 0;
391 return 0; 391 return 0;
392 } 392 }
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 90e4e3a7841b..c03b4a19974e 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -9,15 +9,20 @@
9#include "css.h" 9#include "css.h"
10#include "orb.h" 10#include "orb.h"
11 11
12struct io_subchannel_dma_area {
13 struct ccw1 sense_ccw; /* static ccw for sense command */
14};
15
12struct io_subchannel_private { 16struct io_subchannel_private {
13 union orb orb; /* operation request block */ 17 union orb orb; /* operation request block */
14 struct ccw1 sense_ccw; /* static ccw for sense command */
15 struct ccw_device *cdev;/* pointer to the child ccw device */ 18 struct ccw_device *cdev;/* pointer to the child ccw device */
16 struct { 19 struct {
17 unsigned int suspend:1; /* allow suspend */ 20 unsigned int suspend:1; /* allow suspend */
18 unsigned int prefetch:1;/* deny prefetch */ 21 unsigned int prefetch:1;/* deny prefetch */
19 unsigned int inter:1; /* suppress intermediate interrupts */ 22 unsigned int inter:1; /* suppress intermediate interrupts */
20 } __packed options; 23 } __packed options;
24 struct io_subchannel_dma_area *dma_area;
25 dma_addr_t dma_area_dma;
21} __aligned(8); 26} __aligned(8);
22 27
23#define to_io_private(n) ((struct io_subchannel_private *) \ 28#define to_io_private(n) ((struct io_subchannel_private *) \
@@ -115,6 +120,13 @@ enum cdev_todo {
115#define FAKE_CMD_IRB 1 120#define FAKE_CMD_IRB 1
116#define FAKE_TM_IRB 2 121#define FAKE_TM_IRB 2
117 122
123struct ccw_device_dma_area {
124 struct senseid senseid; /* SenseID info */
125 struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */
126 struct irb irb; /* device status */
127 struct pgid pgid[8]; /* path group IDs per chpid*/
128};
129
118struct ccw_device_private { 130struct ccw_device_private {
119 struct ccw_device *cdev; 131 struct ccw_device *cdev;
120 struct subchannel *sch; 132 struct subchannel *sch;
@@ -156,11 +168,7 @@ struct ccw_device_private {
156 } __attribute__((packed)) flags; 168 } __attribute__((packed)) flags;
157 unsigned long intparm; /* user interruption parameter */ 169 unsigned long intparm; /* user interruption parameter */
158 struct qdio_irq *qdio_data; 170 struct qdio_irq *qdio_data;
159 struct irb irb; /* device status */
160 int async_kill_io_rc; 171 int async_kill_io_rc;
161 struct senseid senseid; /* SenseID info */
162 struct pgid pgid[8]; /* path group IDs per chpid*/
163 struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */
164 struct work_struct todo_work; 172 struct work_struct todo_work;
165 enum cdev_todo todo; 173 enum cdev_todo todo;
166 wait_queue_head_t wait_q; 174 wait_queue_head_t wait_q;
@@ -169,6 +177,8 @@ struct ccw_device_private {
169 struct list_head cmb_list; /* list of measured devices */ 177 struct list_head cmb_list; /* list of measured devices */
170 u64 cmb_start_time; /* clock value of cmb reset */ 178 u64 cmb_start_time; /* clock value of cmb reset */
171 void *cmb_wait; /* deferred cmb enable/disable */ 179 void *cmb_wait; /* deferred cmb enable/disable */
180 struct gen_pool *dma_pool;
181 struct ccw_device_dma_area *dma_area;
172 enum interruption_class int_class; 182 enum interruption_class int_class;
173}; 183};
174 184
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 7b7620de2acd..730c4e68094b 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -736,6 +736,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start)
736 736
737 switch (state) { 737 switch (state) {
738 case SLSB_P_OUTPUT_EMPTY: 738 case SLSB_P_OUTPUT_EMPTY:
739 case SLSB_P_OUTPUT_PENDING:
739 /* the adapter got it */ 740 /* the adapter got it */
740 DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, 741 DBF_DEV_EVENT(DBF_INFO, q->irq_ptr,
741 "out empty:%1d %02x", q->nr, count); 742 "out empty:%1d %02x", q->nr, count);
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index 99d7d2566a3a..d4101cecdc8d 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -150,6 +150,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues)
150 return -ENOMEM; 150 return -ENOMEM;
151 } 151 }
152 irq_ptr_qs[i] = q; 152 irq_ptr_qs[i] = q;
153 INIT_LIST_HEAD(&q->entry);
153 } 154 }
154 return 0; 155 return 0;
155} 156}
@@ -178,6 +179,7 @@ static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr,
178 q->mask = 1 << (31 - i); 179 q->mask = 1 << (31 - i);
179 q->nr = i; 180 q->nr = i;
180 q->handler = handler; 181 q->handler = handler;
182 INIT_LIST_HEAD(&q->entry);
181} 183}
182 184
183static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, 185static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr,
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 28d59ac2204c..93ee067c10ca 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -79,7 +79,6 @@ void tiqdio_add_input_queues(struct qdio_irq *irq_ptr)
79 mutex_lock(&tiq_list_lock); 79 mutex_lock(&tiq_list_lock);
80 list_add_rcu(&irq_ptr->input_qs[0]->entry, &tiq_list); 80 list_add_rcu(&irq_ptr->input_qs[0]->entry, &tiq_list);
81 mutex_unlock(&tiq_list_lock); 81 mutex_unlock(&tiq_list_lock);
82 xchg(irq_ptr->dsci, 1 << 7);
83} 82}
84 83
85void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr) 84void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
@@ -87,14 +86,14 @@ void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
87 struct qdio_q *q; 86 struct qdio_q *q;
88 87
89 q = irq_ptr->input_qs[0]; 88 q = irq_ptr->input_qs[0];
90 /* if establish triggered an error */ 89 if (!q)
91 if (!q || !q->entry.prev || !q->entry.next)
92 return; 90 return;
93 91
94 mutex_lock(&tiq_list_lock); 92 mutex_lock(&tiq_list_lock);
95 list_del_rcu(&q->entry); 93 list_del_rcu(&q->entry);
96 mutex_unlock(&tiq_list_lock); 94 mutex_unlock(&tiq_list_lock);
97 synchronize_rcu(); 95 synchronize_rcu();
96 INIT_LIST_HEAD(&q->entry);
98} 97}
99 98
100static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq_ptr) 99static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq_ptr)
@@ -178,6 +177,7 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq)
178/** 177/**
179 * tiqdio_thinint_handler - thin interrupt handler for qdio 178 * tiqdio_thinint_handler - thin interrupt handler for qdio
180 * @airq: pointer to adapter interrupt descriptor 179 * @airq: pointer to adapter interrupt descriptor
180 * @floating: flag to recognize floating vs. directed interrupts (unused)
181 */ 181 */
182static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating) 182static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating)
183{ 183{
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 0e79799e9a71..1d4c893ead23 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -16,12 +16,6 @@
16 16
17#include "vfio_ccw_cp.h" 17#include "vfio_ccw_cp.h"
18 18
19/*
20 * Max length for ccw chain.
21 * XXX: Limit to 256, need to check more?
22 */
23#define CCWCHAIN_LEN_MAX 256
24
25struct pfn_array { 19struct pfn_array {
26 /* Starting guest physical I/O address. */ 20 /* Starting guest physical I/O address. */
27 unsigned long pa_iova; 21 unsigned long pa_iova;
@@ -33,11 +27,6 @@ struct pfn_array {
33 int pa_nr; 27 int pa_nr;
34}; 28};
35 29
36struct pfn_array_table {
37 struct pfn_array *pat_pa;
38 int pat_nr;
39};
40
41struct ccwchain { 30struct ccwchain {
42 struct list_head next; 31 struct list_head next;
43 struct ccw1 *ch_ccw; 32 struct ccw1 *ch_ccw;
@@ -46,35 +35,29 @@ struct ccwchain {
46 /* Count of the valid ccws in chain. */ 35 /* Count of the valid ccws in chain. */
47 int ch_len; 36 int ch_len;
48 /* Pinned PAGEs for the original data. */ 37 /* Pinned PAGEs for the original data. */
49 struct pfn_array_table *ch_pat; 38 struct pfn_array *ch_pa;
50}; 39};
51 40
52/* 41/*
53 * pfn_array_alloc_pin() - alloc memory for PFNs, then pin user pages in memory 42 * pfn_array_alloc() - alloc memory for PFNs
54 * @pa: pfn_array on which to perform the operation 43 * @pa: pfn_array on which to perform the operation
55 * @mdev: the mediated device to perform pin/unpin operations
56 * @iova: target guest physical address 44 * @iova: target guest physical address
57 * @len: number of bytes that should be pinned from @iova 45 * @len: number of bytes that should be pinned from @iova
58 * 46 *
59 * Attempt to allocate memory for PFNs, and pin user pages in memory. 47 * Attempt to allocate memory for PFNs.
60 * 48 *
61 * Usage of pfn_array: 49 * Usage of pfn_array:
62 * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in 50 * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in
63 * this structure will be filled in by this function. 51 * this structure will be filled in by this function.
64 * 52 *
65 * Returns: 53 * Returns:
66 * Number of pages pinned on success. 54 * 0 if PFNs are allocated
67 * If @pa->pa_nr is not 0, or @pa->pa_iova_pfn is not NULL initially, 55 * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova_pfn is not NULL
68 * returns -EINVAL. 56 * -ENOMEM if alloc failed
69 * If no pages were pinned, returns -errno.
70 */ 57 */
71static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev, 58static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len)
72 u64 iova, unsigned int len)
73{ 59{
74 int i, ret = 0; 60 int i;
75
76 if (!len)
77 return 0;
78 61
79 if (pa->pa_nr || pa->pa_iova_pfn) 62 if (pa->pa_nr || pa->pa_iova_pfn)
80 return -EINVAL; 63 return -EINVAL;
@@ -94,8 +77,27 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
94 pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr; 77 pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
95 78
96 pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT; 79 pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
97 for (i = 1; i < pa->pa_nr; i++) 80 pa->pa_pfn[0] = -1ULL;
81 for (i = 1; i < pa->pa_nr; i++) {
98 pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1; 82 pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
83 pa->pa_pfn[i] = -1ULL;
84 }
85
86 return 0;
87}
88
89/*
90 * pfn_array_pin() - Pin user pages in memory
91 * @pa: pfn_array on which to perform the operation
92 * @mdev: the mediated device to perform pin operations
93 *
94 * Returns number of pages pinned upon success.
95 * If the pin request partially succeeds, or fails completely,
96 * all pages are left unpinned and a negative error value is returned.
97 */
98static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
99{
100 int ret = 0;
99 101
100 ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr, 102 ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
101 IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); 103 IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
@@ -112,8 +114,6 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
112 114
113err_out: 115err_out:
114 pa->pa_nr = 0; 116 pa->pa_nr = 0;
115 kfree(pa->pa_iova_pfn);
116 pa->pa_iova_pfn = NULL;
117 117
118 return ret; 118 return ret;
119} 119}
@@ -121,60 +121,30 @@ err_out:
121/* Unpin the pages before releasing the memory. */ 121/* Unpin the pages before releasing the memory. */
122static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev) 122static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
123{ 123{
124 vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr); 124 /* Only unpin if any pages were pinned to begin with */
125 if (pa->pa_nr)
126 vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
125 pa->pa_nr = 0; 127 pa->pa_nr = 0;
126 kfree(pa->pa_iova_pfn); 128 kfree(pa->pa_iova_pfn);
127} 129}
128 130
129static int pfn_array_table_init(struct pfn_array_table *pat, int nr) 131static bool pfn_array_iova_pinned(struct pfn_array *pa, unsigned long iova)
130{
131 pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
132 if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
133 pat->pat_nr = 0;
134 return -ENOMEM;
135 }
136
137 pat->pat_nr = nr;
138
139 return 0;
140}
141
142static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
143 struct device *mdev)
144{ 132{
145 int i;
146
147 for (i = 0; i < pat->pat_nr; i++)
148 pfn_array_unpin_free(pat->pat_pa + i, mdev);
149
150 if (pat->pat_nr) {
151 kfree(pat->pat_pa);
152 pat->pat_pa = NULL;
153 pat->pat_nr = 0;
154 }
155}
156
157static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
158 unsigned long iova)
159{
160 struct pfn_array *pa = pat->pat_pa;
161 unsigned long iova_pfn = iova >> PAGE_SHIFT; 133 unsigned long iova_pfn = iova >> PAGE_SHIFT;
162 int i, j; 134 int i;
163 135
164 for (i = 0; i < pat->pat_nr; i++, pa++) 136 for (i = 0; i < pa->pa_nr; i++)
165 for (j = 0; j < pa->pa_nr; j++) 137 if (pa->pa_iova_pfn[i] == iova_pfn)
166 if (pa->pa_iova_pfn[j] == iova_pfn) 138 return true;
167 return true;
168 139
169 return false; 140 return false;
170} 141}
171/* Create the list idal words for a pfn_array_table. */ 142/* Create the list of IDAL words for a pfn_array. */
172static inline void pfn_array_table_idal_create_words( 143static inline void pfn_array_idal_create_words(
173 struct pfn_array_table *pat, 144 struct pfn_array *pa,
174 unsigned long *idaws) 145 unsigned long *idaws)
175{ 146{
176 struct pfn_array *pa; 147 int i;
177 int i, j, k;
178 148
179 /* 149 /*
180 * Idal words (execept the first one) rely on the memory being 4k 150 * Idal words (execept the first one) rely on the memory being 4k
@@ -183,19 +153,36 @@ static inline void pfn_array_table_idal_create_words(
183 * there will be no problem here to simply use the phys to create an 153 * there will be no problem here to simply use the phys to create an
184 * idaw. 154 * idaw.
185 */ 155 */
186 k = 0; 156
187 for (i = 0; i < pat->pat_nr; i++) { 157 for (i = 0; i < pa->pa_nr; i++)
188 pa = pat->pat_pa + i; 158 idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT;
189 for (j = 0; j < pa->pa_nr; j++) { 159
190 idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT; 160 /* Adjust the first IDAW, since it may not start on a page boundary */
191 if (k == 0) 161 idaws[0] += pa->pa_iova & (PAGE_SIZE - 1);
192 idaws[k] += pa->pa_iova & (PAGE_SIZE - 1); 162}
193 k++; 163
164static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
165{
166 struct ccw0 ccw0;
167 struct ccw1 *pccw1 = source;
168 int i;
169
170 for (i = 0; i < len; i++) {
171 ccw0 = *(struct ccw0 *)pccw1;
172 if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
173 pccw1->cmd_code = CCW_CMD_TIC;
174 pccw1->flags = 0;
175 pccw1->count = 0;
176 } else {
177 pccw1->cmd_code = ccw0.cmd_code;
178 pccw1->flags = ccw0.flags;
179 pccw1->count = ccw0.count;
194 } 180 }
181 pccw1->cda = ccw0.cda;
182 pccw1++;
195 } 183 }
196} 184}
197 185
198
199/* 186/*
200 * Within the domain (@mdev), copy @n bytes from a guest physical 187 * Within the domain (@mdev), copy @n bytes from a guest physical
201 * address (@iova) to a host physical address (@to). 188 * address (@iova) to a host physical address (@to).
@@ -209,9 +196,15 @@ static long copy_from_iova(struct device *mdev,
209 int i, ret; 196 int i, ret;
210 unsigned long l, m; 197 unsigned long l, m;
211 198
212 ret = pfn_array_alloc_pin(&pa, mdev, iova, n); 199 ret = pfn_array_alloc(&pa, iova, n);
213 if (ret <= 0) 200 if (ret < 0)
201 return ret;
202
203 ret = pfn_array_pin(&pa, mdev);
204 if (ret < 0) {
205 pfn_array_unpin_free(&pa, mdev);
214 return ret; 206 return ret;
207 }
215 208
216 l = n; 209 l = n;
217 for (i = 0; i < pa.pa_nr; i++) { 210 for (i = 0; i < pa.pa_nr; i++) {
@@ -235,55 +228,60 @@ static long copy_from_iova(struct device *mdev,
235 return l; 228 return l;
236} 229}
237 230
238static long copy_ccw_from_iova(struct channel_program *cp,
239 struct ccw1 *to, u64 iova,
240 unsigned long len)
241{
242 struct ccw0 ccw0;
243 struct ccw1 *pccw1;
244 int ret;
245 int i;
246
247 ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
248 if (ret)
249 return ret;
250
251 if (!cp->orb.cmd.fmt) {
252 pccw1 = to;
253 for (i = 0; i < len; i++) {
254 ccw0 = *(struct ccw0 *)pccw1;
255 if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
256 pccw1->cmd_code = CCW_CMD_TIC;
257 pccw1->flags = 0;
258 pccw1->count = 0;
259 } else {
260 pccw1->cmd_code = ccw0.cmd_code;
261 pccw1->flags = ccw0.flags;
262 pccw1->count = ccw0.count;
263 }
264 pccw1->cda = ccw0.cda;
265 pccw1++;
266 }
267 }
268
269 return ret;
270}
271
272/* 231/*
273 * Helpers to operate ccwchain. 232 * Helpers to operate ccwchain.
274 */ 233 */
275#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0) 234#define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
235#define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
236#define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
276 237
277#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) 238#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
278 239
279#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) 240#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
280 241
281#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA) 242#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
282 243#define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
283 244
284#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC)) 245#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
285 246
286/* 247/*
248 * ccw_does_data_transfer()
249 *
250 * Determine whether a CCW will move any data, such that the guest pages
251 * would need to be pinned before performing the I/O.
252 *
253 * Returns 1 if yes, 0 if no.
254 */
255static inline int ccw_does_data_transfer(struct ccw1 *ccw)
256{
257 /* If the count field is zero, then no data will be transferred */
258 if (ccw->count == 0)
259 return 0;
260
261 /* If the command is a NOP, then no data will be transferred */
262 if (ccw_is_noop(ccw))
263 return 0;
264
265 /* If the skip flag is off, then data will be transferred */
266 if (!ccw_is_skip(ccw))
267 return 1;
268
269 /*
270 * If the skip flag is on, it is only meaningful if the command
271 * code is a read, read backward, sense, or sense ID. In those
272 * cases, no data will be transferred.
273 */
274 if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
275 return 0;
276
277 if (ccw_is_sense(ccw))
278 return 0;
279
280 /* The skip flag is on, but it is ignored for this command code. */
281 return 1;
282}
283
284/*
287 * is_cpa_within_range() 285 * is_cpa_within_range()
288 * 286 *
289 * @cpa: channel program address being questioned 287 * @cpa: channel program address being questioned
@@ -319,7 +317,7 @@ static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
319 /* Make ccw address aligned to 8. */ 317 /* Make ccw address aligned to 8. */
320 size = ((sizeof(*chain) + 7L) & -8L) + 318 size = ((sizeof(*chain) + 7L) & -8L) +
321 sizeof(*chain->ch_ccw) * len + 319 sizeof(*chain->ch_ccw) * len +
322 sizeof(*chain->ch_pat) * len; 320 sizeof(*chain->ch_pa) * len;
323 chain = kzalloc(size, GFP_DMA | GFP_KERNEL); 321 chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
324 if (!chain) 322 if (!chain)
325 return NULL; 323 return NULL;
@@ -328,7 +326,7 @@ static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
328 chain->ch_ccw = (struct ccw1 *)data; 326 chain->ch_ccw = (struct ccw1 *)data;
329 327
330 data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len; 328 data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
331 chain->ch_pat = (struct pfn_array_table *)data; 329 chain->ch_pa = (struct pfn_array *)data;
332 330
333 chain->ch_len = len; 331 chain->ch_len = len;
334 332
@@ -348,31 +346,12 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx)
348{ 346{
349 struct ccw1 *ccw = chain->ch_ccw + idx; 347 struct ccw1 *ccw = chain->ch_ccw + idx;
350 348
351 if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw)) 349 if (ccw_is_tic(ccw))
352 return;
353 if (!ccw->count)
354 return; 350 return;
355 351
356 kfree((void *)(u64)ccw->cda); 352 kfree((void *)(u64)ccw->cda);
357} 353}
358 354
359/* Unpin the pages then free the memory resources. */
360static void cp_unpin_free(struct channel_program *cp)
361{
362 struct ccwchain *chain, *temp;
363 int i;
364
365 cp->initialized = false;
366 list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
367 for (i = 0; i < chain->ch_len; i++) {
368 pfn_array_table_unpin_free(chain->ch_pat + i,
369 cp->mdev);
370 ccwchain_cda_free(chain, i);
371 }
372 ccwchain_free(chain);
373 }
374}
375
376/** 355/**
377 * ccwchain_calc_length - calculate the length of the ccw chain. 356 * ccwchain_calc_length - calculate the length of the ccw chain.
378 * @iova: guest physical address of the target ccw chain 357 * @iova: guest physical address of the target ccw chain
@@ -388,25 +367,9 @@ static void cp_unpin_free(struct channel_program *cp)
388 */ 367 */
389static int ccwchain_calc_length(u64 iova, struct channel_program *cp) 368static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
390{ 369{
391 struct ccw1 *ccw, *p; 370 struct ccw1 *ccw = cp->guest_cp;
392 int cnt; 371 int cnt = 0;
393
394 /*
395 * Copy current chain from guest to host kernel.
396 * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
397 * So copying 2K is enough (safe).
398 */
399 p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
400 if (!ccw)
401 return -ENOMEM;
402
403 cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
404 if (cnt) {
405 kfree(ccw);
406 return cnt;
407 }
408 372
409 cnt = 0;
410 do { 373 do {
411 cnt++; 374 cnt++;
412 375
@@ -415,10 +378,8 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
415 * orb specified one of the unsupported formats, we defer 378 * orb specified one of the unsupported formats, we defer
416 * checking for IDAWs in unsupported formats to here. 379 * checking for IDAWs in unsupported formats to here.
417 */ 380 */
418 if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) { 381 if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
419 kfree(p);
420 return -EOPNOTSUPP; 382 return -EOPNOTSUPP;
421 }
422 383
423 /* 384 /*
424 * We want to keep counting if the current CCW has the 385 * We want to keep counting if the current CCW has the
@@ -437,7 +398,6 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
437 if (cnt == CCWCHAIN_LEN_MAX + 1) 398 if (cnt == CCWCHAIN_LEN_MAX + 1)
438 cnt = -EINVAL; 399 cnt = -EINVAL;
439 400
440 kfree(p);
441 return cnt; 401 return cnt;
442} 402}
443 403
@@ -458,17 +418,23 @@ static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
458static int ccwchain_loop_tic(struct ccwchain *chain, 418static int ccwchain_loop_tic(struct ccwchain *chain,
459 struct channel_program *cp); 419 struct channel_program *cp);
460 420
461static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp) 421static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
462{ 422{
463 struct ccwchain *chain; 423 struct ccwchain *chain;
464 int len, ret; 424 int len;
465 425
466 /* May transfer to an existing chain. */ 426 /* Copy 2K (the most we support today) of possible CCWs */
467 if (tic_target_chain_exists(tic, cp)) 427 len = copy_from_iova(cp->mdev, cp->guest_cp, cda,
468 return 0; 428 CCWCHAIN_LEN_MAX * sizeof(struct ccw1));
429 if (len)
430 return len;
469 431
470 /* Get chain length. */ 432 /* Convert any Format-0 CCWs to Format-1 */
471 len = ccwchain_calc_length(tic->cda, cp); 433 if (!cp->orb.cmd.fmt)
434 convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
435
436 /* Count the CCWs in the current chain */
437 len = ccwchain_calc_length(cda, cp);
472 if (len < 0) 438 if (len < 0)
473 return len; 439 return len;
474 440
@@ -476,14 +442,10 @@ static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
476 chain = ccwchain_alloc(cp, len); 442 chain = ccwchain_alloc(cp, len);
477 if (!chain) 443 if (!chain)
478 return -ENOMEM; 444 return -ENOMEM;
479 chain->ch_iova = tic->cda; 445 chain->ch_iova = cda;
480 446
481 /* Copy the new chain from user. */ 447 /* Copy the actual CCWs into the new chain */
482 ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len); 448 memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
483 if (ret) {
484 ccwchain_free(chain);
485 return ret;
486 }
487 449
488 /* Loop for tics on this new chain. */ 450 /* Loop for tics on this new chain. */
489 return ccwchain_loop_tic(chain, cp); 451 return ccwchain_loop_tic(chain, cp);
@@ -501,7 +463,12 @@ static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
501 if (!ccw_is_tic(tic)) 463 if (!ccw_is_tic(tic))
502 continue; 464 continue;
503 465
504 ret = ccwchain_handle_tic(tic, cp); 466 /* May transfer to an existing chain. */
467 if (tic_target_chain_exists(tic, cp))
468 continue;
469
470 /* Build a ccwchain for the next segment */
471 ret = ccwchain_handle_ccw(tic->cda, cp);
505 if (ret) 472 if (ret)
506 return ret; 473 return ret;
507 } 474 }
@@ -534,115 +501,90 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
534 struct channel_program *cp) 501 struct channel_program *cp)
535{ 502{
536 struct ccw1 *ccw; 503 struct ccw1 *ccw;
537 struct pfn_array_table *pat; 504 struct pfn_array *pa;
505 u64 iova;
538 unsigned long *idaws; 506 unsigned long *idaws;
539 int ret; 507 int ret;
508 int bytes = 1;
509 int idaw_nr, idal_len;
510 int i;
540 511
541 ccw = chain->ch_ccw + idx; 512 ccw = chain->ch_ccw + idx;
542 513
543 if (!ccw->count) { 514 if (ccw->count)
544 /* 515 bytes = ccw->count;
545 * We just want the translation result of any direct ccw
546 * to be an IDA ccw, so let's add the IDA flag for it.
547 * Although the flag will be ignored by firmware.
548 */
549 ccw->flags |= CCW_FLAG_IDA;
550 return 0;
551 }
552
553 /*
554 * Pin data page(s) in memory.
555 * The number of pages actually is the count of the idaws which will be
556 * needed when translating a direct ccw to a idal ccw.
557 */
558 pat = chain->ch_pat + idx;
559 ret = pfn_array_table_init(pat, 1);
560 if (ret)
561 goto out_init;
562
563 ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
564 if (ret < 0)
565 goto out_unpin;
566 516
567 /* Translate this direct ccw to a idal ccw. */ 517 /* Calculate size of IDAL */
568 idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL); 518 if (ccw_is_idal(ccw)) {
569 if (!idaws) { 519 /* Read first IDAW to see if it's 4K-aligned or not. */
570 ret = -ENOMEM; 520 /* All subsequent IDAws will be 4K-aligned. */
571 goto out_unpin; 521 ret = copy_from_iova(cp->mdev, &iova, ccw->cda, sizeof(iova));
522 if (ret)
523 return ret;
524 } else {
525 iova = ccw->cda;
572 } 526 }
573 ccw->cda = (__u32) virt_to_phys(idaws); 527 idaw_nr = idal_nr_words((void *)iova, bytes);
574 ccw->flags |= CCW_FLAG_IDA; 528 idal_len = idaw_nr * sizeof(*idaws);
575
576 pfn_array_table_idal_create_words(pat, idaws);
577
578 return 0;
579
580out_unpin:
581 pfn_array_table_unpin_free(pat, cp->mdev);
582out_init:
583 ccw->cda = 0;
584 return ret;
585}
586
587static int ccwchain_fetch_idal(struct ccwchain *chain,
588 int idx,
589 struct channel_program *cp)
590{
591 struct ccw1 *ccw;
592 struct pfn_array_table *pat;
593 unsigned long *idaws;
594 u64 idaw_iova;
595 unsigned int idaw_nr, idaw_len;
596 int i, ret;
597
598 ccw = chain->ch_ccw + idx;
599
600 if (!ccw->count)
601 return 0;
602
603 /* Calculate size of idaws. */
604 ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
605 if (ret)
606 return ret;
607 idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
608 idaw_len = idaw_nr * sizeof(*idaws);
609
610 /* Pin data page(s) in memory. */
611 pat = chain->ch_pat + idx;
612 ret = pfn_array_table_init(pat, idaw_nr);
613 if (ret)
614 goto out_init;
615 529
616 /* Translate idal ccw to use new allocated idaws. */ 530 /* Allocate an IDAL from host storage */
617 idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL); 531 idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
618 if (!idaws) { 532 if (!idaws) {
619 ret = -ENOMEM; 533 ret = -ENOMEM;
620 goto out_unpin; 534 goto out_init;
621 } 535 }
622 536
623 ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len); 537 /*
624 if (ret) 538 * Allocate an array of pfn's for pages to pin/translate.
539 * The number of pages is actually the count of the idaws
540 * required for the data transfer, since we only only support
541 * 4K IDAWs today.
542 */
543 pa = chain->ch_pa + idx;
544 ret = pfn_array_alloc(pa, iova, bytes);
545 if (ret < 0)
625 goto out_free_idaws; 546 goto out_free_idaws;
626 547
627 ccw->cda = virt_to_phys(idaws); 548 if (ccw_is_idal(ccw)) {
549 /* Copy guest IDAL into host IDAL */
550 ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idal_len);
551 if (ret)
552 goto out_unpin;
628 553
629 for (i = 0; i < idaw_nr; i++) { 554 /*
630 idaw_iova = *(idaws + i); 555 * Copy guest IDAWs into pfn_array, in case the memory they
556 * occupy is not contiguous.
557 */
558 for (i = 0; i < idaw_nr; i++)
559 pa->pa_iova_pfn[i] = idaws[i] >> PAGE_SHIFT;
560 } else {
561 /*
562 * No action is required here; the iova addresses in pfn_array
563 * were initialized sequentially in pfn_array_alloc() beginning
564 * with the contents of ccw->cda.
565 */
566 }
631 567
632 ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev, 568 if (ccw_does_data_transfer(ccw)) {
633 idaw_iova, 1); 569 ret = pfn_array_pin(pa, cp->mdev);
634 if (ret < 0) 570 if (ret < 0)
635 goto out_free_idaws; 571 goto out_unpin;
572 } else {
573 pa->pa_nr = 0;
636 } 574 }
637 575
638 pfn_array_table_idal_create_words(pat, idaws); 576 ccw->cda = (__u32) virt_to_phys(idaws);
577 ccw->flags |= CCW_FLAG_IDA;
578
579 /* Populate the IDAL with pinned/translated addresses from pfn */
580 pfn_array_idal_create_words(pa, idaws);
639 581
640 return 0; 582 return 0;
641 583
584out_unpin:
585 pfn_array_unpin_free(pa, cp->mdev);
642out_free_idaws: 586out_free_idaws:
643 kfree(idaws); 587 kfree(idaws);
644out_unpin:
645 pfn_array_table_unpin_free(pat, cp->mdev);
646out_init: 588out_init:
647 ccw->cda = 0; 589 ccw->cda = 0;
648 return ret; 590 return ret;
@@ -660,15 +602,9 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
660{ 602{
661 struct ccw1 *ccw = chain->ch_ccw + idx; 603 struct ccw1 *ccw = chain->ch_ccw + idx;
662 604
663 if (ccw_is_test(ccw) || ccw_is_noop(ccw))
664 return 0;
665
666 if (ccw_is_tic(ccw)) 605 if (ccw_is_tic(ccw))
667 return ccwchain_fetch_tic(chain, idx, cp); 606 return ccwchain_fetch_tic(chain, idx, cp);
668 607
669 if (ccw_is_idal(ccw))
670 return ccwchain_fetch_idal(chain, idx, cp);
671
672 return ccwchain_fetch_direct(chain, idx, cp); 608 return ccwchain_fetch_direct(chain, idx, cp);
673} 609}
674 610
@@ -691,9 +627,7 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
691 */ 627 */
692int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) 628int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
693{ 629{
694 u64 iova = orb->cmd.cpa; 630 int ret;
695 struct ccwchain *chain;
696 int len, ret;
697 631
698 /* 632 /*
699 * XXX: 633 * XXX:
@@ -706,28 +640,11 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
706 memcpy(&cp->orb, orb, sizeof(*orb)); 640 memcpy(&cp->orb, orb, sizeof(*orb));
707 cp->mdev = mdev; 641 cp->mdev = mdev;
708 642
709 /* Get chain length. */ 643 /* Build a ccwchain for the first CCW segment */
710 len = ccwchain_calc_length(iova, cp); 644 ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
711 if (len < 0)
712 return len;
713
714 /* Alloc mem for the head chain. */
715 chain = ccwchain_alloc(cp, len);
716 if (!chain)
717 return -ENOMEM;
718 chain->ch_iova = iova;
719
720 /* Copy the head chain from guest. */
721 ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
722 if (ret) {
723 ccwchain_free(chain);
724 return ret;
725 }
726
727 /* Now loop for its TICs. */
728 ret = ccwchain_loop_tic(chain, cp);
729 if (ret) 645 if (ret)
730 cp_unpin_free(cp); 646 cp_free(cp);
647
731 /* It is safe to force: if not set but idals used 648 /* It is safe to force: if not set but idals used
732 * ccwchain_calc_length returns an error. 649 * ccwchain_calc_length returns an error.
733 */ 650 */
@@ -750,8 +667,20 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
750 */ 667 */
751void cp_free(struct channel_program *cp) 668void cp_free(struct channel_program *cp)
752{ 669{
753 if (cp->initialized) 670 struct ccwchain *chain, *temp;
754 cp_unpin_free(cp); 671 int i;
672
673 if (!cp->initialized)
674 return;
675
676 cp->initialized = false;
677 list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
678 for (i = 0; i < chain->ch_len; i++) {
679 pfn_array_unpin_free(chain->ch_pa + i, cp->mdev);
680 ccwchain_cda_free(chain, i);
681 }
682 ccwchain_free(chain);
683 }
755} 684}
756 685
757/** 686/**
@@ -886,7 +815,11 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
886 */ 815 */
887 list_for_each_entry(chain, &cp->ccwchain_list, next) { 816 list_for_each_entry(chain, &cp->ccwchain_list, next) {
888 ccw_head = (u32)(u64)chain->ch_ccw; 817 ccw_head = (u32)(u64)chain->ch_ccw;
889 if (is_cpa_within_range(cpa, ccw_head, chain->ch_len)) { 818 /*
819 * On successful execution, cpa points just beyond the end
820 * of the chain.
821 */
822 if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
890 /* 823 /*
891 * (cpa - ccw_head) is the offset value of the host 824 * (cpa - ccw_head) is the offset value of the host
892 * physical ccw to its chain head. 825 * physical ccw to its chain head.
@@ -919,8 +852,7 @@ bool cp_iova_pinned(struct channel_program *cp, u64 iova)
919 852
920 list_for_each_entry(chain, &cp->ccwchain_list, next) { 853 list_for_each_entry(chain, &cp->ccwchain_list, next) {
921 for (i = 0; i < chain->ch_len; i++) 854 for (i = 0; i < chain->ch_len; i++)
922 if (pfn_array_table_iova_pinned(chain->ch_pat + i, 855 if (pfn_array_iova_pinned(chain->ch_pa + i, iova))
923 iova))
924 return true; 856 return true;
925 } 857 }
926 858
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
index 3c20cd208da5..7cdc38049033 100644
--- a/drivers/s390/cio/vfio_ccw_cp.h
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -16,6 +16,12 @@
16 16
17#include "orb.h" 17#include "orb.h"
18 18
19/*
20 * Max length for ccw chain.
21 * XXX: Limit to 256, need to check more?
22 */
23#define CCWCHAIN_LEN_MAX 256
24
19/** 25/**
20 * struct channel_program - manage information for channel program 26 * struct channel_program - manage information for channel program
21 * @ccwchain_list: list head of ccwchains 27 * @ccwchain_list: list head of ccwchains
@@ -32,6 +38,7 @@ struct channel_program {
32 union orb orb; 38 union orb orb;
33 struct device *mdev; 39 struct device *mdev;
34 bool initialized; 40 bool initialized;
41 struct ccw1 *guest_cp;
35}; 42};
36 43
37extern int cp_init(struct channel_program *cp, struct device *mdev, 44extern int cp_init(struct channel_program *cp, struct device *mdev,
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 9125f7f4e64c..2b90a5ecaeb9 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -95,11 +95,11 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
95 memcpy(private->io_region->irb_area, irb, sizeof(*irb)); 95 memcpy(private->io_region->irb_area, irb, sizeof(*irb));
96 mutex_unlock(&private->io_mutex); 96 mutex_unlock(&private->io_mutex);
97 97
98 if (private->io_trigger)
99 eventfd_signal(private->io_trigger, 1);
100
101 if (private->mdev && is_final) 98 if (private->mdev && is_final)
102 private->state = VFIO_CCW_STATE_IDLE; 99 private->state = VFIO_CCW_STATE_IDLE;
100
101 if (private->io_trigger)
102 eventfd_signal(private->io_trigger, 1);
103} 103}
104 104
105/* 105/*
@@ -129,6 +129,11 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
129 if (!private) 129 if (!private)
130 return -ENOMEM; 130 return -ENOMEM;
131 131
132 private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
133 GFP_KERNEL);
134 if (!private->cp.guest_cp)
135 goto out_free;
136
132 private->io_region = kmem_cache_zalloc(vfio_ccw_io_region, 137 private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
133 GFP_KERNEL | GFP_DMA); 138 GFP_KERNEL | GFP_DMA);
134 if (!private->io_region) 139 if (!private->io_region)
@@ -169,6 +174,7 @@ out_free:
169 kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); 174 kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
170 if (private->io_region) 175 if (private->io_region)
171 kmem_cache_free(vfio_ccw_io_region, private->io_region); 176 kmem_cache_free(vfio_ccw_io_region, private->io_region);
177 kfree(private->cp.guest_cp);
172 kfree(private); 178 kfree(private);
173 return ret; 179 return ret;
174} 180}
@@ -185,6 +191,7 @@ static int vfio_ccw_sch_remove(struct subchannel *sch)
185 191
186 kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); 192 kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
187 kmem_cache_free(vfio_ccw_io_region, private->io_region); 193 kmem_cache_free(vfio_ccw_io_region, private->io_region);
194 kfree(private->cp.guest_cp);
188 kfree(private); 195 kfree(private);
189 196
190 return 0; 197 return 0;
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 45eb0c14b880..7f418d2d8cdf 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -690,7 +690,7 @@ int pkey_clr2protkey(u32 keytype,
690 */ 690 */
691 if (!cpacf_test_func(&pckmo_functions, fc)) { 691 if (!cpacf_test_func(&pckmo_functions, fc)) {
692 DEBUG_ERR("%s pckmo functions not available\n", __func__); 692 DEBUG_ERR("%s pckmo functions not available\n", __func__);
693 return -EOPNOTSUPP; 693 return -ENODEV;
694 } 694 }
695 695
696 /* prepare param block */ 696 /* prepare param block */
@@ -1695,15 +1695,15 @@ static int __init pkey_init(void)
1695 * are able to work with protected keys. 1695 * are able to work with protected keys.
1696 */ 1696 */
1697 if (!cpacf_query(CPACF_PCKMO, &pckmo_functions)) 1697 if (!cpacf_query(CPACF_PCKMO, &pckmo_functions))
1698 return -EOPNOTSUPP; 1698 return -ENODEV;
1699 1699
1700 /* check for kmc instructions available */ 1700 /* check for kmc instructions available */
1701 if (!cpacf_query(CPACF_KMC, &kmc_functions)) 1701 if (!cpacf_query(CPACF_KMC, &kmc_functions))
1702 return -EOPNOTSUPP; 1702 return -ENODEV;
1703 if (!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) || 1703 if (!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
1704 !cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) || 1704 !cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
1705 !cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) 1705 !cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256))
1706 return -EOPNOTSUPP; 1706 return -ENODEV;
1707 1707
1708 pkey_debug_init(); 1708 pkey_debug_init();
1709 1709
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index e9824c35c34f..003662aa8060 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -5,6 +5,7 @@
5 * Copyright IBM Corp. 2018 5 * Copyright IBM Corp. 2018
6 * 6 *
7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> 7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
8 * Pierre Morel <pmorel@linux.ibm.com>
8 */ 9 */
9 10
10#include <linux/module.h> 11#include <linux/module.h>
@@ -40,14 +41,45 @@ static struct ap_device_id ap_queue_ids[] = {
40 41
41MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids); 42MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
42 43
44/**
45 * vfio_ap_queue_dev_probe:
46 *
47 * Allocate a vfio_ap_queue structure and associate it
48 * with the device as driver_data.
49 */
43static int vfio_ap_queue_dev_probe(struct ap_device *apdev) 50static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
44{ 51{
52 struct vfio_ap_queue *q;
53
54 q = kzalloc(sizeof(*q), GFP_KERNEL);
55 if (!q)
56 return -ENOMEM;
57 dev_set_drvdata(&apdev->device, q);
58 q->apqn = to_ap_queue(&apdev->device)->qid;
59 q->saved_isc = VFIO_AP_ISC_INVALID;
45 return 0; 60 return 0;
46} 61}
47 62
63/**
64 * vfio_ap_queue_dev_remove:
65 *
66 * Takes the matrix lock to avoid actions on this device while removing
67 * Free the associated vfio_ap_queue structure
68 */
48static void vfio_ap_queue_dev_remove(struct ap_device *apdev) 69static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
49{ 70{
50 /* Nothing to do yet */ 71 struct vfio_ap_queue *q;
72 int apid, apqi;
73
74 mutex_lock(&matrix_dev->lock);
75 q = dev_get_drvdata(&apdev->device);
76 dev_set_drvdata(&apdev->device, NULL);
77 apid = AP_QID_CARD(q->apqn);
78 apqi = AP_QID_QUEUE(q->apqn);
79 vfio_ap_mdev_reset_queue(apid, apqi, 1);
80 vfio_ap_irq_disable(q);
81 kfree(q);
82 mutex_unlock(&matrix_dev->lock);
51} 83}
52 84
53static void vfio_ap_matrix_dev_release(struct device *dev) 85static void vfio_ap_matrix_dev_release(struct device *dev)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 900b9cf20ca5..2c9fb1423a39 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -24,6 +24,296 @@
24#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" 24#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
25#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" 25#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
26 26
27static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);
28
29static int match_apqn(struct device *dev, void *data)
30{
31 struct vfio_ap_queue *q = dev_get_drvdata(dev);
32
33 return (q->apqn == *(int *)(data)) ? 1 : 0;
34}
35
36/**
37 * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list
38 * @matrix_mdev: the associated mediated matrix
39 * @apqn: The queue APQN
40 *
41 * Retrieve a queue with a specific APQN from the list of the
42 * devices of the vfio_ap_drv.
43 * Verify that the APID and the APQI are set in the matrix.
44 *
45 * Returns the pointer to the associated vfio_ap_queue
46 */
47static struct vfio_ap_queue *vfio_ap_get_queue(
48 struct ap_matrix_mdev *matrix_mdev,
49 int apqn)
50{
51 struct vfio_ap_queue *q;
52 struct device *dev;
53
54 if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
55 return NULL;
56 if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
57 return NULL;
58
59 dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
60 &apqn, match_apqn);
61 if (!dev)
62 return NULL;
63 q = dev_get_drvdata(dev);
64 q->matrix_mdev = matrix_mdev;
65 put_device(dev);
66
67 return q;
68}
69
70/**
71 * vfio_ap_wait_for_irqclear
72 * @apqn: The AP Queue number
73 *
74 * Checks the IRQ bit for the status of this APQN using ap_tapq.
75 * Returns if the ap_tapq function succeeded and the bit is clear.
76 * Returns if ap_tapq function failed with invalid, deconfigured or
77 * checkstopped AP.
78 * Otherwise retries up to 5 times after waiting 20ms.
79 *
80 */
81static void vfio_ap_wait_for_irqclear(int apqn)
82{
83 struct ap_queue_status status;
84 int retry = 5;
85
86 do {
87 status = ap_tapq(apqn, NULL);
88 switch (status.response_code) {
89 case AP_RESPONSE_NORMAL:
90 case AP_RESPONSE_RESET_IN_PROGRESS:
91 if (!status.irq_enabled)
92 return;
93 /* Fall through */
94 case AP_RESPONSE_BUSY:
95 msleep(20);
96 break;
97 case AP_RESPONSE_Q_NOT_AVAIL:
98 case AP_RESPONSE_DECONFIGURED:
99 case AP_RESPONSE_CHECKSTOPPED:
100 default:
101 WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
102 status.response_code, apqn);
103 return;
104 }
105 } while (--retry);
106
107 WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
108 __func__, status.response_code, apqn);
109}
110
111/**
112 * vfio_ap_free_aqic_resources
113 * @q: The vfio_ap_queue
114 *
115 * Unregisters the ISC in the GIB when the saved ISC not invalid.
116 * Unpin the guest's page holding the NIB when it exist.
117 * Reset the saved_pfn and saved_isc to invalid values.
118 * Clear the pointer to the matrix mediated device.
119 *
120 */
121static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
122{
123 if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev)
124 kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
125 if (q->saved_pfn && q->matrix_mdev)
126 vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
127 &q->saved_pfn, 1);
128 q->saved_pfn = 0;
129 q->saved_isc = VFIO_AP_ISC_INVALID;
130 q->matrix_mdev = NULL;
131}
132
133/**
134 * vfio_ap_irq_disable
135 * @q: The vfio_ap_queue
136 *
137 * Uses ap_aqic to disable the interruption and in case of success, reset
138 * in progress or IRQ disable command already proceeded: calls
139 * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
140 * and calls vfio_ap_free_aqic_resources() to free the resources associated
141 * with the AP interrupt handling.
142 *
143 * In the case the AP is busy, or a reset is in progress,
144 * retries after 20ms, up to 5 times.
145 *
146 * Returns if ap_aqic function failed with invalid, deconfigured or
147 * checkstopped AP.
148 */
149struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
150{
151 struct ap_qirq_ctrl aqic_gisa = {};
152 struct ap_queue_status status;
153 int retries = 5;
154
155 do {
156 status = ap_aqic(q->apqn, aqic_gisa, NULL);
157 switch (status.response_code) {
158 case AP_RESPONSE_OTHERWISE_CHANGED:
159 case AP_RESPONSE_NORMAL:
160 vfio_ap_wait_for_irqclear(q->apqn);
161 goto end_free;
162 case AP_RESPONSE_RESET_IN_PROGRESS:
163 case AP_RESPONSE_BUSY:
164 msleep(20);
165 break;
166 case AP_RESPONSE_Q_NOT_AVAIL:
167 case AP_RESPONSE_DECONFIGURED:
168 case AP_RESPONSE_CHECKSTOPPED:
169 case AP_RESPONSE_INVALID_ADDRESS:
170 default:
171 /* All cases in default means AP not operational */
172 WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
173 status.response_code);
174 goto end_free;
175 }
176 } while (retries--);
177
178 WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
179 status.response_code);
180end_free:
181 vfio_ap_free_aqic_resources(q);
182 return status;
183}
184
185/**
186 * vfio_ap_setirq: Enable Interruption for a APQN
187 *
188 * @dev: the device associated with the ap_queue
189 * @q: the vfio_ap_queue holding AQIC parameters
190 *
191 * Pin the NIB saved in *q
192 * Register the guest ISC to GIB interface and retrieve the
193 * host ISC to issue the host side PQAP/AQIC
194 *
195 * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
196 * vfio_pin_pages failed.
197 *
198 * Otherwise return the ap_queue_status returned by the ap_aqic(),
199 * all retry handling will be done by the guest.
200 */
201static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
202 int isc,
203 unsigned long nib)
204{
205 struct ap_qirq_ctrl aqic_gisa = {};
206 struct ap_queue_status status = {};
207 struct kvm_s390_gisa *gisa;
208 struct kvm *kvm;
209 unsigned long h_nib, g_pfn, h_pfn;
210 int ret;
211
212 g_pfn = nib >> PAGE_SHIFT;
213 ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
214 IOMMU_READ | IOMMU_WRITE, &h_pfn);
215 switch (ret) {
216 case 1:
217 break;
218 default:
219 status.response_code = AP_RESPONSE_INVALID_ADDRESS;
220 return status;
221 }
222
223 kvm = q->matrix_mdev->kvm;
224 gisa = kvm->arch.gisa_int.origin;
225
226 h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
227 aqic_gisa.gisc = isc;
228 aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc);
229 aqic_gisa.ir = 1;
230 aqic_gisa.gisa = (uint64_t)gisa >> 4;
231
232 status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
233 switch (status.response_code) {
234 case AP_RESPONSE_NORMAL:
235 /* See if we did clear older IRQ configuration */
236 vfio_ap_free_aqic_resources(q);
237 q->saved_pfn = g_pfn;
238 q->saved_isc = isc;
239 break;
240 case AP_RESPONSE_OTHERWISE_CHANGED:
241 /* We could not modify IRQ setings: clear new configuration */
242 vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
243 kvm_s390_gisc_unregister(kvm, isc);
244 break;
245 default:
246 pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
247 status.response_code);
248 vfio_ap_irq_disable(q);
249 break;
250 }
251
252 return status;
253}
254
255/**
256 * handle_pqap: PQAP instruction callback
257 *
258 * @vcpu: The vcpu on which we received the PQAP instruction
259 *
260 * Get the general register contents to initialize internal variables.
261 * REG[0]: APQN
262 * REG[1]: IR and ISC
263 * REG[2]: NIB
264 *
265 * Response.status may be set to following Response Code:
266 * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
267 * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
268 * - AP_RESPONSE_NORMAL (0) : in case of successs
269 * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
270 * We take the matrix_dev lock to ensure serialization on queues and
271 * mediated device access.
272 *
273 * Return 0 if we could handle the request inside KVM.
274 * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
275 */
276static int handle_pqap(struct kvm_vcpu *vcpu)
277{
278 uint64_t status;
279 uint16_t apqn;
280 struct vfio_ap_queue *q;
281 struct ap_queue_status qstatus = {
282 .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
283 struct ap_matrix_mdev *matrix_mdev;
284
285 /* If we do not use the AIV facility just go to userland */
286 if (!(vcpu->arch.sie_block->eca & ECA_AIV))
287 return -EOPNOTSUPP;
288
289 apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
290 mutex_lock(&matrix_dev->lock);
291
292 if (!vcpu->kvm->arch.crypto.pqap_hook)
293 goto out_unlock;
294 matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
295 struct ap_matrix_mdev, pqap_hook);
296
297 q = vfio_ap_get_queue(matrix_mdev, apqn);
298 if (!q)
299 goto out_unlock;
300
301 status = vcpu->run->s.regs.gprs[1];
302
303 /* If IR bit(16) is set we enable the interrupt */
304 if ((status >> (63 - 16)) & 0x01)
305 qstatus = vfio_ap_irq_enable(q, status & 0x07,
306 vcpu->run->s.regs.gprs[2]);
307 else
308 qstatus = vfio_ap_irq_disable(q);
309
310out_unlock:
311 memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
312 vcpu->run->s.regs.gprs[1] >>= 32;
313 mutex_unlock(&matrix_dev->lock);
314 return 0;
315}
316
27static void vfio_ap_matrix_init(struct ap_config_info *info, 317static void vfio_ap_matrix_init(struct ap_config_info *info,
28 struct ap_matrix *matrix) 318 struct ap_matrix *matrix)
29{ 319{
@@ -45,8 +335,11 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
45 return -ENOMEM; 335 return -ENOMEM;
46 } 336 }
47 337
338 matrix_mdev->mdev = mdev;
48 vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); 339 vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
49 mdev_set_drvdata(mdev, matrix_mdev); 340 mdev_set_drvdata(mdev, matrix_mdev);
341 matrix_mdev->pqap_hook.hook = handle_pqap;
342 matrix_mdev->pqap_hook.owner = THIS_MODULE;
50 mutex_lock(&matrix_dev->lock); 343 mutex_lock(&matrix_dev->lock);
51 list_add(&matrix_mdev->node, &matrix_dev->mdev_list); 344 list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
52 mutex_unlock(&matrix_dev->lock); 345 mutex_unlock(&matrix_dev->lock);
@@ -62,6 +355,7 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
62 return -EBUSY; 355 return -EBUSY;
63 356
64 mutex_lock(&matrix_dev->lock); 357 mutex_lock(&matrix_dev->lock);
358 vfio_ap_mdev_reset_queues(mdev);
65 list_del(&matrix_mdev->node); 359 list_del(&matrix_mdev->node);
66 mutex_unlock(&matrix_dev->lock); 360 mutex_unlock(&matrix_dev->lock);
67 361
@@ -754,11 +1048,42 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
754 } 1048 }
755 1049
756 matrix_mdev->kvm = kvm; 1050 matrix_mdev->kvm = kvm;
1051 kvm_get_kvm(kvm);
1052 kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
757 mutex_unlock(&matrix_dev->lock); 1053 mutex_unlock(&matrix_dev->lock);
758 1054
759 return 0; 1055 return 0;
760} 1056}
761 1057
1058/*
1059 * vfio_ap_mdev_iommu_notifier: IOMMU notifier callback
1060 *
1061 * @nb: The notifier block
1062 * @action: Action to be taken
1063 * @data: data associated with the request
1064 *
1065 * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
1066 * pinned before). Other requests are ignored.
1067 *
1068 */
1069static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
1070 unsigned long action, void *data)
1071{
1072 struct ap_matrix_mdev *matrix_mdev;
1073
1074 matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
1075
1076 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
1077 struct vfio_iommu_type1_dma_unmap *unmap = data;
1078 unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
1079
1080 vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
1081 return NOTIFY_OK;
1082 }
1083
1084 return NOTIFY_DONE;
1085}
1086
762static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, 1087static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
763 unsigned long action, void *data) 1088 unsigned long action, void *data)
764{ 1089{
@@ -790,15 +1115,36 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
790 return NOTIFY_OK; 1115 return NOTIFY_OK;
791} 1116}
792 1117
793static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, 1118static void vfio_ap_irq_disable_apqn(int apqn)
794 unsigned int retry) 1119{
1120 struct device *dev;
1121 struct vfio_ap_queue *q;
1122
1123 dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1124 &apqn, match_apqn);
1125 if (dev) {
1126 q = dev_get_drvdata(dev);
1127 vfio_ap_irq_disable(q);
1128 put_device(dev);
1129 }
1130}
1131
1132int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
1133 unsigned int retry)
795{ 1134{
796 struct ap_queue_status status; 1135 struct ap_queue_status status;
1136 int retry2 = 2;
1137 int apqn = AP_MKQID(apid, apqi);
797 1138
798 do { 1139 do {
799 status = ap_zapq(AP_MKQID(apid, apqi)); 1140 status = ap_zapq(apqn);
800 switch (status.response_code) { 1141 switch (status.response_code) {
801 case AP_RESPONSE_NORMAL: 1142 case AP_RESPONSE_NORMAL:
1143 while (!status.queue_empty && retry2--) {
1144 msleep(20);
1145 status = ap_tapq(apqn, NULL);
1146 }
1147 WARN_ON_ONCE(retry <= 0);
802 return 0; 1148 return 0;
803 case AP_RESPONSE_RESET_IN_PROGRESS: 1149 case AP_RESPONSE_RESET_IN_PROGRESS:
804 case AP_RESPONSE_BUSY: 1150 case AP_RESPONSE_BUSY:
@@ -832,6 +1178,7 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
832 */ 1178 */
833 if (ret) 1179 if (ret)
834 rc = ret; 1180 rc = ret;
1181 vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi));
835 } 1182 }
836 } 1183 }
837 1184
@@ -858,20 +1205,37 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
858 return ret; 1205 return ret;
859 } 1206 }
860 1207
861 return 0; 1208 matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
1209 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1210 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
1211 &events, &matrix_mdev->iommu_notifier);
1212 if (!ret)
1213 return ret;
1214
1215 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
1216 &matrix_mdev->group_notifier);
1217 module_put(THIS_MODULE);
1218 return ret;
862} 1219}
863 1220
864static void vfio_ap_mdev_release(struct mdev_device *mdev) 1221static void vfio_ap_mdev_release(struct mdev_device *mdev)
865{ 1222{
866 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); 1223 struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
867 1224
868 if (matrix_mdev->kvm) 1225 mutex_lock(&matrix_dev->lock);
1226 if (matrix_mdev->kvm) {
869 kvm_arch_crypto_clear_masks(matrix_mdev->kvm); 1227 kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
1228 matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
1229 vfio_ap_mdev_reset_queues(mdev);
1230 kvm_put_kvm(matrix_mdev->kvm);
1231 matrix_mdev->kvm = NULL;
1232 }
1233 mutex_unlock(&matrix_dev->lock);
870 1234
871 vfio_ap_mdev_reset_queues(mdev); 1235 vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
1236 &matrix_mdev->iommu_notifier);
872 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, 1237 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
873 &matrix_mdev->group_notifier); 1238 &matrix_mdev->group_notifier);
874 matrix_mdev->kvm = NULL;
875 module_put(THIS_MODULE); 1239 module_put(THIS_MODULE);
876} 1240}
877 1241
@@ -900,6 +1264,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
900{ 1264{
901 int ret; 1265 int ret;
902 1266
1267 mutex_lock(&matrix_dev->lock);
903 switch (cmd) { 1268 switch (cmd) {
904 case VFIO_DEVICE_GET_INFO: 1269 case VFIO_DEVICE_GET_INFO:
905 ret = vfio_ap_mdev_get_device_info(arg); 1270 ret = vfio_ap_mdev_get_device_info(arg);
@@ -911,6 +1276,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
911 ret = -EOPNOTSUPP; 1276 ret = -EOPNOTSUPP;
912 break; 1277 break;
913 } 1278 }
1279 mutex_unlock(&matrix_dev->lock);
914 1280
915 return ret; 1281 return ret;
916} 1282}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 76b7f98e47e9..f46dde56b464 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -4,6 +4,7 @@
4 * 4 *
5 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com> 5 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
6 * Halil Pasic <pasic@linux.ibm.com> 6 * Halil Pasic <pasic@linux.ibm.com>
7 * Pierre Morel <pmorel@linux.ibm.com>
7 * 8 *
8 * Copyright IBM Corp. 2018 9 * Copyright IBM Corp. 2018
9 */ 10 */
@@ -16,6 +17,7 @@
16#include <linux/mdev.h> 17#include <linux/mdev.h>
17#include <linux/delay.h> 18#include <linux/delay.h>
18#include <linux/mutex.h> 19#include <linux/mutex.h>
20#include <linux/kvm_host.h>
19 21
20#include "ap_bus.h" 22#include "ap_bus.h"
21 23
@@ -80,10 +82,23 @@ struct ap_matrix_mdev {
80 struct list_head node; 82 struct list_head node;
81 struct ap_matrix matrix; 83 struct ap_matrix matrix;
82 struct notifier_block group_notifier; 84 struct notifier_block group_notifier;
85 struct notifier_block iommu_notifier;
83 struct kvm *kvm; 86 struct kvm *kvm;
87 struct kvm_s390_module_hook pqap_hook;
88 struct mdev_device *mdev;
84}; 89};
85 90
86extern int vfio_ap_mdev_register(void); 91extern int vfio_ap_mdev_register(void);
87extern void vfio_ap_mdev_unregister(void); 92extern void vfio_ap_mdev_unregister(void);
93int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
94 unsigned int retry);
88 95
96struct vfio_ap_queue {
97 struct ap_matrix_mdev *matrix_mdev;
98 unsigned long saved_pfn;
99 int apqn;
100#define VFIO_AP_ISC_INVALID 0xff
101 unsigned char saved_isc;
102};
103struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q);
89#endif /* _VFIO_AP_PRIVATE_H_ */ 104#endif /* _VFIO_AP_PRIVATE_H_ */
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 0cbcc238ef98..12fe9deb265e 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -567,6 +567,10 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
567 payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt); 567 payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
568 *fcode = payload_hdr->func_val & 0xFFFF; 568 *fcode = payload_hdr->func_val & 0xFFFF;
569 569
570 /* enable special processing based on the cprbs flags special bit */
571 if (msg->cprbx.flags & 0x20)
572 ap_msg->special = 1;
573
570 return 0; 574 return 0;
571} 575}
572 576
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index 7c5a25ddf832..ced896d1534a 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -7,10 +7,10 @@ config LCS
7 prompt "Lan Channel Station Interface" 7 prompt "Lan Channel Station Interface"
8 depends on CCW && NETDEVICES && (ETHERNET || FDDI) 8 depends on CCW && NETDEVICES && (ETHERNET || FDDI)
9 help 9 help
10 Select this option if you want to use LCS networking on IBM System z. 10 Select this option if you want to use LCS networking on IBM System z.
11 This device driver supports FDDI (IEEE 802.7) and Ethernet. 11 This device driver supports FDDI (IEEE 802.7) and Ethernet.
12 To compile as a module, choose M. The module name is lcs. 12 To compile as a module, choose M. The module name is lcs.
13 If you do not know what it is, it's safe to choose Y. 13 If you do not know what it is, it's safe to choose Y.
14 14
15config CTCM 15config CTCM
16 def_tristate m 16 def_tristate m
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 6a3076881321..1a55e5942d36 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -46,9 +46,15 @@ struct vq_config_block {
46#define VIRTIO_CCW_CONFIG_SIZE 0x100 46#define VIRTIO_CCW_CONFIG_SIZE 0x100
47/* same as PCI config space size, should be enough for all drivers */ 47/* same as PCI config space size, should be enough for all drivers */
48 48
49struct vcdev_dma_area {
50 unsigned long indicators;
51 unsigned long indicators2;
52 struct vq_config_block config_block;
53 __u8 status;
54};
55
49struct virtio_ccw_device { 56struct virtio_ccw_device {
50 struct virtio_device vdev; 57 struct virtio_device vdev;
51 __u8 *status;
52 __u8 config[VIRTIO_CCW_CONFIG_SIZE]; 58 __u8 config[VIRTIO_CCW_CONFIG_SIZE];
53 struct ccw_device *cdev; 59 struct ccw_device *cdev;
54 __u32 curr_io; 60 __u32 curr_io;
@@ -58,17 +64,24 @@ struct virtio_ccw_device {
58 spinlock_t lock; 64 spinlock_t lock;
59 struct mutex io_lock; /* Serializes I/O requests */ 65 struct mutex io_lock; /* Serializes I/O requests */
60 struct list_head virtqueues; 66 struct list_head virtqueues;
61 unsigned long indicators;
62 unsigned long indicators2;
63 struct vq_config_block *config_block;
64 bool is_thinint; 67 bool is_thinint;
65 bool going_away; 68 bool going_away;
66 bool device_lost; 69 bool device_lost;
67 unsigned int config_ready; 70 unsigned int config_ready;
68 void *airq_info; 71 void *airq_info;
69 u64 dma_mask; 72 struct vcdev_dma_area *dma_area;
70}; 73};
71 74
75static inline unsigned long *indicators(struct virtio_ccw_device *vcdev)
76{
77 return &vcdev->dma_area->indicators;
78}
79
80static inline unsigned long *indicators2(struct virtio_ccw_device *vcdev)
81{
82 return &vcdev->dma_area->indicators2;
83}
84
72struct vq_info_block_legacy { 85struct vq_info_block_legacy {
73 __u64 queue; 86 __u64 queue;
74 __u32 align; 87 __u32 align;
@@ -127,11 +140,17 @@ static int virtio_ccw_use_airq = 1;
127 140
128struct airq_info { 141struct airq_info {
129 rwlock_t lock; 142 rwlock_t lock;
130 u8 summary_indicator; 143 u8 summary_indicator_idx;
131 struct airq_struct airq; 144 struct airq_struct airq;
132 struct airq_iv *aiv; 145 struct airq_iv *aiv;
133}; 146};
134static struct airq_info *airq_areas[MAX_AIRQ_AREAS]; 147static struct airq_info *airq_areas[MAX_AIRQ_AREAS];
148static u8 *summary_indicators;
149
150static inline u8 *get_summary_indicator(struct airq_info *info)
151{
152 return summary_indicators + info->summary_indicator_idx;
153}
135 154
136#define CCW_CMD_SET_VQ 0x13 155#define CCW_CMD_SET_VQ 0x13
137#define CCW_CMD_VDEV_RESET 0x33 156#define CCW_CMD_VDEV_RESET 0x33
@@ -196,7 +215,7 @@ static void virtio_airq_handler(struct airq_struct *airq, bool floating)
196 break; 215 break;
197 vring_interrupt(0, (void *)airq_iv_get_ptr(info->aiv, ai)); 216 vring_interrupt(0, (void *)airq_iv_get_ptr(info->aiv, ai));
198 } 217 }
199 info->summary_indicator = 0; 218 *(get_summary_indicator(info)) = 0;
200 smp_wmb(); 219 smp_wmb();
201 /* Walk through indicators field, summary indicator not active. */ 220 /* Walk through indicators field, summary indicator not active. */
202 for (ai = 0;;) { 221 for (ai = 0;;) {
@@ -208,7 +227,7 @@ static void virtio_airq_handler(struct airq_struct *airq, bool floating)
208 read_unlock(&info->lock); 227 read_unlock(&info->lock);
209} 228}
210 229
211static struct airq_info *new_airq_info(void) 230static struct airq_info *new_airq_info(int index)
212{ 231{
213 struct airq_info *info; 232 struct airq_info *info;
214 int rc; 233 int rc;
@@ -217,13 +236,15 @@ static struct airq_info *new_airq_info(void)
217 if (!info) 236 if (!info)
218 return NULL; 237 return NULL;
219 rwlock_init(&info->lock); 238 rwlock_init(&info->lock);
220 info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR); 239 info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR
240 | AIRQ_IV_CACHELINE);
221 if (!info->aiv) { 241 if (!info->aiv) {
222 kfree(info); 242 kfree(info);
223 return NULL; 243 return NULL;
224 } 244 }
225 info->airq.handler = virtio_airq_handler; 245 info->airq.handler = virtio_airq_handler;
226 info->airq.lsi_ptr = &info->summary_indicator; 246 info->summary_indicator_idx = index;
247 info->airq.lsi_ptr = get_summary_indicator(info);
227 info->airq.lsi_mask = 0xff; 248 info->airq.lsi_mask = 0xff;
228 info->airq.isc = VIRTIO_AIRQ_ISC; 249 info->airq.isc = VIRTIO_AIRQ_ISC;
229 rc = register_adapter_interrupt(&info->airq); 250 rc = register_adapter_interrupt(&info->airq);
@@ -245,7 +266,7 @@ static unsigned long get_airq_indicator(struct virtqueue *vqs[], int nvqs,
245 266
246 for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) { 267 for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
247 if (!airq_areas[i]) 268 if (!airq_areas[i])
248 airq_areas[i] = new_airq_info(); 269 airq_areas[i] = new_airq_info(i);
249 info = airq_areas[i]; 270 info = airq_areas[i];
250 if (!info) 271 if (!info)
251 return 0; 272 return 0;
@@ -326,29 +347,29 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev,
326 struct airq_info *airq_info = vcdev->airq_info; 347 struct airq_info *airq_info = vcdev->airq_info;
327 348
328 if (vcdev->is_thinint) { 349 if (vcdev->is_thinint) {
329 thinint_area = kzalloc(sizeof(*thinint_area), 350 thinint_area = ccw_device_dma_zalloc(vcdev->cdev,
330 GFP_DMA | GFP_KERNEL); 351 sizeof(*thinint_area));
331 if (!thinint_area) 352 if (!thinint_area)
332 return; 353 return;
333 thinint_area->summary_indicator = 354 thinint_area->summary_indicator =
334 (unsigned long) &airq_info->summary_indicator; 355 (unsigned long) get_summary_indicator(airq_info);
335 thinint_area->isc = VIRTIO_AIRQ_ISC; 356 thinint_area->isc = VIRTIO_AIRQ_ISC;
336 ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER; 357 ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER;
337 ccw->count = sizeof(*thinint_area); 358 ccw->count = sizeof(*thinint_area);
338 ccw->cda = (__u32)(unsigned long) thinint_area; 359 ccw->cda = (__u32)(unsigned long) thinint_area;
339 } else { 360 } else {
340 /* payload is the address of the indicators */ 361 /* payload is the address of the indicators */
341 indicatorp = kmalloc(sizeof(&vcdev->indicators), 362 indicatorp = ccw_device_dma_zalloc(vcdev->cdev,
342 GFP_DMA | GFP_KERNEL); 363 sizeof(indicators(vcdev)));
343 if (!indicatorp) 364 if (!indicatorp)
344 return; 365 return;
345 *indicatorp = 0; 366 *indicatorp = 0;
346 ccw->cmd_code = CCW_CMD_SET_IND; 367 ccw->cmd_code = CCW_CMD_SET_IND;
347 ccw->count = sizeof(&vcdev->indicators); 368 ccw->count = sizeof(indicators(vcdev));
348 ccw->cda = (__u32)(unsigned long) indicatorp; 369 ccw->cda = (__u32)(unsigned long) indicatorp;
349 } 370 }
350 /* Deregister indicators from host. */ 371 /* Deregister indicators from host. */
351 vcdev->indicators = 0; 372 *indicators(vcdev) = 0;
352 ccw->flags = 0; 373 ccw->flags = 0;
353 ret = ccw_io_helper(vcdev, ccw, 374 ret = ccw_io_helper(vcdev, ccw,
354 vcdev->is_thinint ? 375 vcdev->is_thinint ?
@@ -359,8 +380,8 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev,
359 "Failed to deregister indicators (%d)\n", ret); 380 "Failed to deregister indicators (%d)\n", ret);
360 else if (vcdev->is_thinint) 381 else if (vcdev->is_thinint)
361 virtio_ccw_drop_indicators(vcdev); 382 virtio_ccw_drop_indicators(vcdev);
362 kfree(indicatorp); 383 ccw_device_dma_free(vcdev->cdev, indicatorp, sizeof(indicators(vcdev)));
363 kfree(thinint_area); 384 ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area));
364} 385}
365 386
366static inline long __do_kvm_notify(struct subchannel_id schid, 387static inline long __do_kvm_notify(struct subchannel_id schid,
@@ -407,15 +428,15 @@ static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,
407{ 428{
408 int ret; 429 int ret;
409 430
410 vcdev->config_block->index = index; 431 vcdev->dma_area->config_block.index = index;
411 ccw->cmd_code = CCW_CMD_READ_VQ_CONF; 432 ccw->cmd_code = CCW_CMD_READ_VQ_CONF;
412 ccw->flags = 0; 433 ccw->flags = 0;
413 ccw->count = sizeof(struct vq_config_block); 434 ccw->count = sizeof(struct vq_config_block);
414 ccw->cda = (__u32)(unsigned long)(vcdev->config_block); 435 ccw->cda = (__u32)(unsigned long)(&vcdev->dma_area->config_block);
415 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); 436 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF);
416 if (ret) 437 if (ret)
417 return ret; 438 return ret;
418 return vcdev->config_block->num ?: -ENOENT; 439 return vcdev->dma_area->config_block.num ?: -ENOENT;
419} 440}
420 441
421static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw) 442static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw)
@@ -460,7 +481,8 @@ static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw)
460 ret, index); 481 ret, index);
461 482
462 vring_del_virtqueue(vq); 483 vring_del_virtqueue(vq);
463 kfree(info->info_block); 484 ccw_device_dma_free(vcdev->cdev, info->info_block,
485 sizeof(*info->info_block));
464 kfree(info); 486 kfree(info);
465} 487}
466 488
@@ -470,7 +492,7 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev)
470 struct ccw1 *ccw; 492 struct ccw1 *ccw;
471 struct virtio_ccw_device *vcdev = to_vc_device(vdev); 493 struct virtio_ccw_device *vcdev = to_vc_device(vdev);
472 494
473 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 495 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
474 if (!ccw) 496 if (!ccw)
475 return; 497 return;
476 498
@@ -479,7 +501,7 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev)
479 list_for_each_entry_safe(vq, n, &vdev->vqs, list) 501 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
480 virtio_ccw_del_vq(vq, ccw); 502 virtio_ccw_del_vq(vq, ccw);
481 503
482 kfree(ccw); 504 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
483} 505}
484 506
485static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, 507static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
@@ -502,8 +524,8 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
502 err = -ENOMEM; 524 err = -ENOMEM;
503 goto out_err; 525 goto out_err;
504 } 526 }
505 info->info_block = kzalloc(sizeof(*info->info_block), 527 info->info_block = ccw_device_dma_zalloc(vcdev->cdev,
506 GFP_DMA | GFP_KERNEL); 528 sizeof(*info->info_block));
507 if (!info->info_block) { 529 if (!info->info_block) {
508 dev_warn(&vcdev->cdev->dev, "no info block\n"); 530 dev_warn(&vcdev->cdev->dev, "no info block\n");
509 err = -ENOMEM; 531 err = -ENOMEM;
@@ -567,7 +589,8 @@ out_err:
567 if (vq) 589 if (vq)
568 vring_del_virtqueue(vq); 590 vring_del_virtqueue(vq);
569 if (info) { 591 if (info) {
570 kfree(info->info_block); 592 ccw_device_dma_free(vcdev->cdev, info->info_block,
593 sizeof(*info->info_block));
571 } 594 }
572 kfree(info); 595 kfree(info);
573 return ERR_PTR(err); 596 return ERR_PTR(err);
@@ -581,7 +604,8 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
581 struct virtio_thinint_area *thinint_area = NULL; 604 struct virtio_thinint_area *thinint_area = NULL;
582 struct airq_info *info; 605 struct airq_info *info;
583 606
584 thinint_area = kzalloc(sizeof(*thinint_area), GFP_DMA | GFP_KERNEL); 607 thinint_area = ccw_device_dma_zalloc(vcdev->cdev,
608 sizeof(*thinint_area));
585 if (!thinint_area) { 609 if (!thinint_area) {
586 ret = -ENOMEM; 610 ret = -ENOMEM;
587 goto out; 611 goto out;
@@ -596,7 +620,7 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
596 } 620 }
597 info = vcdev->airq_info; 621 info = vcdev->airq_info;
598 thinint_area->summary_indicator = 622 thinint_area->summary_indicator =
599 (unsigned long) &info->summary_indicator; 623 (unsigned long) get_summary_indicator(info);
600 thinint_area->isc = VIRTIO_AIRQ_ISC; 624 thinint_area->isc = VIRTIO_AIRQ_ISC;
601 ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER; 625 ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER;
602 ccw->flags = CCW_FLAG_SLI; 626 ccw->flags = CCW_FLAG_SLI;
@@ -617,7 +641,7 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
617 virtio_ccw_drop_indicators(vcdev); 641 virtio_ccw_drop_indicators(vcdev);
618 } 642 }
619out: 643out:
620 kfree(thinint_area); 644 ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area));
621 return ret; 645 return ret;
622} 646}
623 647
@@ -633,7 +657,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
633 int ret, i, queue_idx = 0; 657 int ret, i, queue_idx = 0;
634 struct ccw1 *ccw; 658 struct ccw1 *ccw;
635 659
636 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 660 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
637 if (!ccw) 661 if (!ccw)
638 return -ENOMEM; 662 return -ENOMEM;
639 663
@@ -657,10 +681,11 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
657 * We need a data area under 2G to communicate. Our payload is 681 * We need a data area under 2G to communicate. Our payload is
658 * the address of the indicators. 682 * the address of the indicators.
659 */ 683 */
660 indicatorp = kmalloc(sizeof(&vcdev->indicators), GFP_DMA | GFP_KERNEL); 684 indicatorp = ccw_device_dma_zalloc(vcdev->cdev,
685 sizeof(indicators(vcdev)));
661 if (!indicatorp) 686 if (!indicatorp)
662 goto out; 687 goto out;
663 *indicatorp = (unsigned long) &vcdev->indicators; 688 *indicatorp = (unsigned long) indicators(vcdev);
664 if (vcdev->is_thinint) { 689 if (vcdev->is_thinint) {
665 ret = virtio_ccw_register_adapter_ind(vcdev, vqs, nvqs, ccw); 690 ret = virtio_ccw_register_adapter_ind(vcdev, vqs, nvqs, ccw);
666 if (ret) 691 if (ret)
@@ -669,32 +694,36 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
669 } 694 }
670 if (!vcdev->is_thinint) { 695 if (!vcdev->is_thinint) {
671 /* Register queue indicators with host. */ 696 /* Register queue indicators with host. */
672 vcdev->indicators = 0; 697 *indicators(vcdev) = 0;
673 ccw->cmd_code = CCW_CMD_SET_IND; 698 ccw->cmd_code = CCW_CMD_SET_IND;
674 ccw->flags = 0; 699 ccw->flags = 0;
675 ccw->count = sizeof(&vcdev->indicators); 700 ccw->count = sizeof(indicators(vcdev));
676 ccw->cda = (__u32)(unsigned long) indicatorp; 701 ccw->cda = (__u32)(unsigned long) indicatorp;
677 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND); 702 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND);
678 if (ret) 703 if (ret)
679 goto out; 704 goto out;
680 } 705 }
681 /* Register indicators2 with host for config changes */ 706 /* Register indicators2 with host for config changes */
682 *indicatorp = (unsigned long) &vcdev->indicators2; 707 *indicatorp = (unsigned long) indicators2(vcdev);
683 vcdev->indicators2 = 0; 708 *indicators2(vcdev) = 0;
684 ccw->cmd_code = CCW_CMD_SET_CONF_IND; 709 ccw->cmd_code = CCW_CMD_SET_CONF_IND;
685 ccw->flags = 0; 710 ccw->flags = 0;
686 ccw->count = sizeof(&vcdev->indicators2); 711 ccw->count = sizeof(indicators2(vcdev));
687 ccw->cda = (__u32)(unsigned long) indicatorp; 712 ccw->cda = (__u32)(unsigned long) indicatorp;
688 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND); 713 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND);
689 if (ret) 714 if (ret)
690 goto out; 715 goto out;
691 716
692 kfree(indicatorp); 717 if (indicatorp)
693 kfree(ccw); 718 ccw_device_dma_free(vcdev->cdev, indicatorp,
719 sizeof(indicators(vcdev)));
720 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
694 return 0; 721 return 0;
695out: 722out:
696 kfree(indicatorp); 723 if (indicatorp)
697 kfree(ccw); 724 ccw_device_dma_free(vcdev->cdev, indicatorp,
725 sizeof(indicators(vcdev)));
726 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
698 virtio_ccw_del_vqs(vdev); 727 virtio_ccw_del_vqs(vdev);
699 return ret; 728 return ret;
700} 729}
@@ -704,12 +733,12 @@ static void virtio_ccw_reset(struct virtio_device *vdev)
704 struct virtio_ccw_device *vcdev = to_vc_device(vdev); 733 struct virtio_ccw_device *vcdev = to_vc_device(vdev);
705 struct ccw1 *ccw; 734 struct ccw1 *ccw;
706 735
707 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 736 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
708 if (!ccw) 737 if (!ccw)
709 return; 738 return;
710 739
711 /* Zero status bits. */ 740 /* Zero status bits. */
712 *vcdev->status = 0; 741 vcdev->dma_area->status = 0;
713 742
714 /* Send a reset ccw on device. */ 743 /* Send a reset ccw on device. */
715 ccw->cmd_code = CCW_CMD_VDEV_RESET; 744 ccw->cmd_code = CCW_CMD_VDEV_RESET;
@@ -717,7 +746,7 @@ static void virtio_ccw_reset(struct virtio_device *vdev)
717 ccw->count = 0; 746 ccw->count = 0;
718 ccw->cda = 0; 747 ccw->cda = 0;
719 ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_RESET); 748 ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_RESET);
720 kfree(ccw); 749 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
721} 750}
722 751
723static u64 virtio_ccw_get_features(struct virtio_device *vdev) 752static u64 virtio_ccw_get_features(struct virtio_device *vdev)
@@ -728,11 +757,11 @@ static u64 virtio_ccw_get_features(struct virtio_device *vdev)
728 u64 rc; 757 u64 rc;
729 struct ccw1 *ccw; 758 struct ccw1 *ccw;
730 759
731 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 760 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
732 if (!ccw) 761 if (!ccw)
733 return 0; 762 return 0;
734 763
735 features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL); 764 features = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*features));
736 if (!features) { 765 if (!features) {
737 rc = 0; 766 rc = 0;
738 goto out_free; 767 goto out_free;
@@ -765,8 +794,8 @@ static u64 virtio_ccw_get_features(struct virtio_device *vdev)
765 rc |= (u64)le32_to_cpu(features->features) << 32; 794 rc |= (u64)le32_to_cpu(features->features) << 32;
766 795
767out_free: 796out_free:
768 kfree(features); 797 ccw_device_dma_free(vcdev->cdev, features, sizeof(*features));
769 kfree(ccw); 798 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
770 return rc; 799 return rc;
771} 800}
772 801
@@ -791,11 +820,11 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev)
791 return -EINVAL; 820 return -EINVAL;
792 } 821 }
793 822
794 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 823 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
795 if (!ccw) 824 if (!ccw)
796 return -ENOMEM; 825 return -ENOMEM;
797 826
798 features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL); 827 features = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*features));
799 if (!features) { 828 if (!features) {
800 ret = -ENOMEM; 829 ret = -ENOMEM;
801 goto out_free; 830 goto out_free;
@@ -830,8 +859,8 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev)
830 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT); 859 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT);
831 860
832out_free: 861out_free:
833 kfree(features); 862 ccw_device_dma_free(vcdev->cdev, features, sizeof(*features));
834 kfree(ccw); 863 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
835 864
836 return ret; 865 return ret;
837} 866}
@@ -845,11 +874,12 @@ static void virtio_ccw_get_config(struct virtio_device *vdev,
845 void *config_area; 874 void *config_area;
846 unsigned long flags; 875 unsigned long flags;
847 876
848 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 877 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
849 if (!ccw) 878 if (!ccw)
850 return; 879 return;
851 880
852 config_area = kzalloc(VIRTIO_CCW_CONFIG_SIZE, GFP_DMA | GFP_KERNEL); 881 config_area = ccw_device_dma_zalloc(vcdev->cdev,
882 VIRTIO_CCW_CONFIG_SIZE);
853 if (!config_area) 883 if (!config_area)
854 goto out_free; 884 goto out_free;
855 885
@@ -871,8 +901,8 @@ static void virtio_ccw_get_config(struct virtio_device *vdev,
871 memcpy(buf, config_area + offset, len); 901 memcpy(buf, config_area + offset, len);
872 902
873out_free: 903out_free:
874 kfree(config_area); 904 ccw_device_dma_free(vcdev->cdev, config_area, VIRTIO_CCW_CONFIG_SIZE);
875 kfree(ccw); 905 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
876} 906}
877 907
878static void virtio_ccw_set_config(struct virtio_device *vdev, 908static void virtio_ccw_set_config(struct virtio_device *vdev,
@@ -884,11 +914,12 @@ static void virtio_ccw_set_config(struct virtio_device *vdev,
884 void *config_area; 914 void *config_area;
885 unsigned long flags; 915 unsigned long flags;
886 916
887 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 917 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
888 if (!ccw) 918 if (!ccw)
889 return; 919 return;
890 920
891 config_area = kzalloc(VIRTIO_CCW_CONFIG_SIZE, GFP_DMA | GFP_KERNEL); 921 config_area = ccw_device_dma_zalloc(vcdev->cdev,
922 VIRTIO_CCW_CONFIG_SIZE);
892 if (!config_area) 923 if (!config_area)
893 goto out_free; 924 goto out_free;
894 925
@@ -907,61 +938,61 @@ static void virtio_ccw_set_config(struct virtio_device *vdev,
907 ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_CONFIG); 938 ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_CONFIG);
908 939
909out_free: 940out_free:
910 kfree(config_area); 941 ccw_device_dma_free(vcdev->cdev, config_area, VIRTIO_CCW_CONFIG_SIZE);
911 kfree(ccw); 942 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
912} 943}
913 944
914static u8 virtio_ccw_get_status(struct virtio_device *vdev) 945static u8 virtio_ccw_get_status(struct virtio_device *vdev)
915{ 946{
916 struct virtio_ccw_device *vcdev = to_vc_device(vdev); 947 struct virtio_ccw_device *vcdev = to_vc_device(vdev);
917 u8 old_status = *vcdev->status; 948 u8 old_status = vcdev->dma_area->status;
918 struct ccw1 *ccw; 949 struct ccw1 *ccw;
919 950
920 if (vcdev->revision < 1) 951 if (vcdev->revision < 1)
921 return *vcdev->status; 952 return vcdev->dma_area->status;
922 953
923 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 954 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
924 if (!ccw) 955 if (!ccw)
925 return old_status; 956 return old_status;
926 957
927 ccw->cmd_code = CCW_CMD_READ_STATUS; 958 ccw->cmd_code = CCW_CMD_READ_STATUS;
928 ccw->flags = 0; 959 ccw->flags = 0;
929 ccw->count = sizeof(*vcdev->status); 960 ccw->count = sizeof(vcdev->dma_area->status);
930 ccw->cda = (__u32)(unsigned long)vcdev->status; 961 ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status;
931 ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_STATUS); 962 ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_STATUS);
932/* 963/*
933 * If the channel program failed (should only happen if the device 964 * If the channel program failed (should only happen if the device
934 * was hotunplugged, and then we clean up via the machine check 965 * was hotunplugged, and then we clean up via the machine check
935 * handler anyway), vcdev->status was not overwritten and we just 966 * handler anyway), vcdev->dma_area->status was not overwritten and we just
936 * return the old status, which is fine. 967 * return the old status, which is fine.
937*/ 968*/
938 kfree(ccw); 969 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
939 970
940 return *vcdev->status; 971 return vcdev->dma_area->status;
941} 972}
942 973
943static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) 974static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status)
944{ 975{
945 struct virtio_ccw_device *vcdev = to_vc_device(vdev); 976 struct virtio_ccw_device *vcdev = to_vc_device(vdev);
946 u8 old_status = *vcdev->status; 977 u8 old_status = vcdev->dma_area->status;
947 struct ccw1 *ccw; 978 struct ccw1 *ccw;
948 int ret; 979 int ret;
949 980
950 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 981 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
951 if (!ccw) 982 if (!ccw)
952 return; 983 return;
953 984
954 /* Write the status to the host. */ 985 /* Write the status to the host. */
955 *vcdev->status = status; 986 vcdev->dma_area->status = status;
956 ccw->cmd_code = CCW_CMD_WRITE_STATUS; 987 ccw->cmd_code = CCW_CMD_WRITE_STATUS;
957 ccw->flags = 0; 988 ccw->flags = 0;
958 ccw->count = sizeof(status); 989 ccw->count = sizeof(status);
959 ccw->cda = (__u32)(unsigned long)vcdev->status; 990 ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status;
960 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS); 991 ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS);
961 /* Write failed? We assume status is unchanged. */ 992 /* Write failed? We assume status is unchanged. */
962 if (ret) 993 if (ret)
963 *vcdev->status = old_status; 994 vcdev->dma_area->status = old_status;
964 kfree(ccw); 995 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
965} 996}
966 997
967static const char *virtio_ccw_bus_name(struct virtio_device *vdev) 998static const char *virtio_ccw_bus_name(struct virtio_device *vdev)
@@ -994,8 +1025,8 @@ static void virtio_ccw_release_dev(struct device *_d)
994 struct virtio_device *dev = dev_to_virtio(_d); 1025 struct virtio_device *dev = dev_to_virtio(_d);
995 struct virtio_ccw_device *vcdev = to_vc_device(dev); 1026 struct virtio_ccw_device *vcdev = to_vc_device(dev);
996 1027
997 kfree(vcdev->status); 1028 ccw_device_dma_free(vcdev->cdev, vcdev->dma_area,
998 kfree(vcdev->config_block); 1029 sizeof(*vcdev->dma_area));
999 kfree(vcdev); 1030 kfree(vcdev);
1000} 1031}
1001 1032
@@ -1093,17 +1124,17 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev,
1093 vcdev->err = -EIO; 1124 vcdev->err = -EIO;
1094 } 1125 }
1095 virtio_ccw_check_activity(vcdev, activity); 1126 virtio_ccw_check_activity(vcdev, activity);
1096 for_each_set_bit(i, &vcdev->indicators, 1127 for_each_set_bit(i, indicators(vcdev),
1097 sizeof(vcdev->indicators) * BITS_PER_BYTE) { 1128 sizeof(*indicators(vcdev)) * BITS_PER_BYTE) {
1098 /* The bit clear must happen before the vring kick. */ 1129 /* The bit clear must happen before the vring kick. */
1099 clear_bit(i, &vcdev->indicators); 1130 clear_bit(i, indicators(vcdev));
1100 barrier(); 1131 barrier();
1101 vq = virtio_ccw_vq_by_ind(vcdev, i); 1132 vq = virtio_ccw_vq_by_ind(vcdev, i);
1102 vring_interrupt(0, vq); 1133 vring_interrupt(0, vq);
1103 } 1134 }
1104 if (test_bit(0, &vcdev->indicators2)) { 1135 if (test_bit(0, indicators2(vcdev))) {
1105 virtio_config_changed(&vcdev->vdev); 1136 virtio_config_changed(&vcdev->vdev);
1106 clear_bit(0, &vcdev->indicators2); 1137 clear_bit(0, indicators2(vcdev));
1107 } 1138 }
1108} 1139}
1109 1140
@@ -1203,12 +1234,12 @@ static int virtio_ccw_set_transport_rev(struct virtio_ccw_device *vcdev)
1203 struct ccw1 *ccw; 1234 struct ccw1 *ccw;
1204 int ret; 1235 int ret;
1205 1236
1206 ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); 1237 ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
1207 if (!ccw) 1238 if (!ccw)
1208 return -ENOMEM; 1239 return -ENOMEM;
1209 rev = kzalloc(sizeof(*rev), GFP_DMA | GFP_KERNEL); 1240 rev = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*rev));
1210 if (!rev) { 1241 if (!rev) {
1211 kfree(ccw); 1242 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
1212 return -ENOMEM; 1243 return -ENOMEM;
1213 } 1244 }
1214 1245
@@ -1238,8 +1269,8 @@ static int virtio_ccw_set_transport_rev(struct virtio_ccw_device *vcdev)
1238 } 1269 }
1239 } while (ret == -EOPNOTSUPP); 1270 } while (ret == -EOPNOTSUPP);
1240 1271
1241 kfree(ccw); 1272 ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
1242 kfree(rev); 1273 ccw_device_dma_free(vcdev->cdev, rev, sizeof(*rev));
1243 return ret; 1274 return ret;
1244} 1275}
1245 1276
@@ -1255,24 +1286,11 @@ static int virtio_ccw_online(struct ccw_device *cdev)
1255 ret = -ENOMEM; 1286 ret = -ENOMEM;
1256 goto out_free; 1287 goto out_free;
1257 } 1288 }
1258
1259 vcdev->vdev.dev.parent = &cdev->dev; 1289 vcdev->vdev.dev.parent = &cdev->dev;
1260 cdev->dev.dma_mask = &vcdev->dma_mask; 1290 vcdev->cdev = cdev;
1261 /* we are fine with common virtio infrastructure using 64 bit DMA */ 1291 vcdev->dma_area = ccw_device_dma_zalloc(vcdev->cdev,
1262 ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64)); 1292 sizeof(*vcdev->dma_area));
1263 if (ret) { 1293 if (!vcdev->dma_area) {
1264 dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
1265 goto out_free;
1266 }
1267
1268 vcdev->config_block = kzalloc(sizeof(*vcdev->config_block),
1269 GFP_DMA | GFP_KERNEL);
1270 if (!vcdev->config_block) {
1271 ret = -ENOMEM;
1272 goto out_free;
1273 }
1274 vcdev->status = kzalloc(sizeof(*vcdev->status), GFP_DMA | GFP_KERNEL);
1275 if (!vcdev->status) {
1276 ret = -ENOMEM; 1294 ret = -ENOMEM;
1277 goto out_free; 1295 goto out_free;
1278 } 1296 }
@@ -1281,7 +1299,6 @@ static int virtio_ccw_online(struct ccw_device *cdev)
1281 1299
1282 vcdev->vdev.dev.release = virtio_ccw_release_dev; 1300 vcdev->vdev.dev.release = virtio_ccw_release_dev;
1283 vcdev->vdev.config = &virtio_ccw_config_ops; 1301 vcdev->vdev.config = &virtio_ccw_config_ops;
1284 vcdev->cdev = cdev;
1285 init_waitqueue_head(&vcdev->wait_q); 1302 init_waitqueue_head(&vcdev->wait_q);
1286 INIT_LIST_HEAD(&vcdev->virtqueues); 1303 INIT_LIST_HEAD(&vcdev->virtqueues);
1287 spin_lock_init(&vcdev->lock); 1304 spin_lock_init(&vcdev->lock);
@@ -1312,8 +1329,8 @@ out_put:
1312 return ret; 1329 return ret;
1313out_free: 1330out_free:
1314 if (vcdev) { 1331 if (vcdev) {
1315 kfree(vcdev->status); 1332 ccw_device_dma_free(vcdev->cdev, vcdev->dma_area,
1316 kfree(vcdev->config_block); 1333 sizeof(*vcdev->dma_area));
1317 } 1334 }
1318 kfree(vcdev); 1335 kfree(vcdev);
1319 return ret; 1336 return ret;
@@ -1483,8 +1500,17 @@ static void __init no_auto_parse(void)
1483 1500
1484static int __init virtio_ccw_init(void) 1501static int __init virtio_ccw_init(void)
1485{ 1502{
1503 int rc;
1504
1486 /* parse no_auto string before we do anything further */ 1505 /* parse no_auto string before we do anything further */
1487 no_auto_parse(); 1506 no_auto_parse();
1488 return ccw_driver_register(&virtio_ccw_driver); 1507
1508 summary_indicators = cio_dma_zalloc(MAX_AIRQ_AREAS);
1509 if (!summary_indicators)
1510 return -ENOMEM;
1511 rc = ccw_driver_register(&virtio_ccw_driver);
1512 if (rc)
1513 cio_dma_free(summary_indicators, MAX_AIRQ_AREAS);
1514 return rc;
1489} 1515}
1490device_initcall(virtio_ccw_init); 1516device_initcall(virtio_ccw_init);
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index ecee4b3ff073..377b07b2feeb 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -763,6 +763,7 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd
763 struct pvscsi_adapter *adapter = shost_priv(host); 763 struct pvscsi_adapter *adapter = shost_priv(host);
764 struct pvscsi_ctx *ctx; 764 struct pvscsi_ctx *ctx;
765 unsigned long flags; 765 unsigned long flags;
766 unsigned char op;
766 767
767 spin_lock_irqsave(&adapter->hw_lock, flags); 768 spin_lock_irqsave(&adapter->hw_lock, flags);
768 769
@@ -775,13 +776,14 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd
775 } 776 }
776 777
777 cmd->scsi_done = done; 778 cmd->scsi_done = done;
779 op = cmd->cmnd[0];
778 780
779 dev_dbg(&cmd->device->sdev_gendev, 781 dev_dbg(&cmd->device->sdev_gendev,
780 "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, cmd->cmnd[0]); 782 "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
781 783
782 spin_unlock_irqrestore(&adapter->hw_lock, flags); 784 spin_unlock_irqrestore(&adapter->hw_lock, flags);
783 785
784 pvscsi_kick_io(adapter, cmd->cmnd[0]); 786 pvscsi_kick_io(adapter, op);
785 787
786 return 0; 788 return 0;
787} 789}
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 524ecdc2a9bb..2ec355003524 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_ARCH_ROCKCHIP) += rockchip/
22obj-$(CONFIG_SOC_SAMSUNG) += samsung/ 22obj-$(CONFIG_SOC_SAMSUNG) += samsung/
23obj-y += sunxi/ 23obj-y += sunxi/
24obj-$(CONFIG_ARCH_TEGRA) += tegra/ 24obj-$(CONFIG_ARCH_TEGRA) += tegra/
25obj-$(CONFIG_SOC_TI) += ti/ 25obj-y += ti/
26obj-$(CONFIG_ARCH_U8500) += ux500/ 26obj-$(CONFIG_ARCH_U8500) += ux500/
27obj-$(CONFIG_PLAT_VERSATILE) += versatile/ 27obj-$(CONFIG_PLAT_VERSATILE) += versatile/
28obj-y += xilinx/ 28obj-y += xilinx/
diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig
index ea0859f7b185..d7d50d48d05d 100644
--- a/drivers/soc/ti/Kconfig
+++ b/drivers/soc/ti/Kconfig
@@ -75,10 +75,10 @@ config TI_SCI_PM_DOMAINS
75 called ti_sci_pm_domains. Note this is needed early in boot before 75 called ti_sci_pm_domains. Note this is needed early in boot before
76 rootfs may be available. 76 rootfs may be available.
77 77
78endif # SOC_TI
79
78config TI_SCI_INTA_MSI_DOMAIN 80config TI_SCI_INTA_MSI_DOMAIN
79 bool 81 bool
80 select GENERIC_MSI_IRQ_DOMAIN 82 select GENERIC_MSI_IRQ_DOMAIN
81 help 83 help
82 Driver to enable Interrupt Aggregator specific MSI Domain. 84 Driver to enable Interrupt Aggregator specific MSI Domain.
83
84endif # SOC_TI
diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index b6e4862cc242..51ddca2033e0 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -81,6 +81,12 @@ out:
81 return CHAP_DIGEST_UNKNOWN; 81 return CHAP_DIGEST_UNKNOWN;
82} 82}
83 83
84static void chap_close(struct iscsi_conn *conn)
85{
86 kfree(conn->auth_protocol);
87 conn->auth_protocol = NULL;
88}
89
84static struct iscsi_chap *chap_server_open( 90static struct iscsi_chap *chap_server_open(
85 struct iscsi_conn *conn, 91 struct iscsi_conn *conn,
86 struct iscsi_node_auth *auth, 92 struct iscsi_node_auth *auth,
@@ -118,7 +124,7 @@ static struct iscsi_chap *chap_server_open(
118 case CHAP_DIGEST_UNKNOWN: 124 case CHAP_DIGEST_UNKNOWN:
119 default: 125 default:
120 pr_err("Unsupported CHAP_A value\n"); 126 pr_err("Unsupported CHAP_A value\n");
121 kfree(conn->auth_protocol); 127 chap_close(conn);
122 return NULL; 128 return NULL;
123 } 129 }
124 130
@@ -133,19 +139,13 @@ static struct iscsi_chap *chap_server_open(
133 * Generate Challenge. 139 * Generate Challenge.
134 */ 140 */
135 if (chap_gen_challenge(conn, 1, aic_str, aic_len) < 0) { 141 if (chap_gen_challenge(conn, 1, aic_str, aic_len) < 0) {
136 kfree(conn->auth_protocol); 142 chap_close(conn);
137 return NULL; 143 return NULL;
138 } 144 }
139 145
140 return chap; 146 return chap;
141} 147}
142 148
143static void chap_close(struct iscsi_conn *conn)
144{
145 kfree(conn->auth_protocol);
146 conn->auth_protocol = NULL;
147}
148
149static int chap_server_compute_md5( 149static int chap_server_compute_md5(
150 struct iscsi_conn *conn, 150 struct iscsi_conn *conn,
151 struct iscsi_node_auth *auth, 151 struct iscsi_node_auth *auth,
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index f4a075303e9a..6949ea8bc387 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -502,7 +502,7 @@ iblock_execute_write_same(struct se_cmd *cmd)
502 502
503 /* Always in 512 byte units for Linux/Block */ 503 /* Always in 512 byte units for Linux/Block */
504 block_lba += sg->length >> SECTOR_SHIFT; 504 block_lba += sg->length >> SECTOR_SHIFT;
505 sectors -= 1; 505 sectors -= sg->length >> SECTOR_SHIFT;
506 } 506 }
507 507
508 iblock_submit_bios(&list); 508 iblock_submit_bios(&list);
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index e38f104db174..fde8d4073e74 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -487,7 +487,7 @@ static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
487 487
488static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) 488static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
489{ 489{
490 lockdep_assert_held_exclusive(&tty->ldisc_sem); 490 lockdep_assert_held_write(&tty->ldisc_sem);
491 WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags)); 491 WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
492 clear_bit(TTY_LDISC_OPEN, &tty->flags); 492 clear_bit(TTY_LDISC_OPEN, &tty->flags);
493 if (ld->ops->close) 493 if (ld->ops->close)
@@ -509,7 +509,7 @@ static int tty_ldisc_failto(struct tty_struct *tty, int ld)
509 struct tty_ldisc *disc = tty_ldisc_get(tty, ld); 509 struct tty_ldisc *disc = tty_ldisc_get(tty, ld);
510 int r; 510 int r;
511 511
512 lockdep_assert_held_exclusive(&tty->ldisc_sem); 512 lockdep_assert_held_write(&tty->ldisc_sem);
513 if (IS_ERR(disc)) 513 if (IS_ERR(disc))
514 return PTR_ERR(disc); 514 return PTR_ERR(disc);
515 tty->ldisc = disc; 515 tty->ldisc = disc;
@@ -633,7 +633,7 @@ EXPORT_SYMBOL_GPL(tty_set_ldisc);
633 */ 633 */
634static void tty_ldisc_kill(struct tty_struct *tty) 634static void tty_ldisc_kill(struct tty_struct *tty)
635{ 635{
636 lockdep_assert_held_exclusive(&tty->ldisc_sem); 636 lockdep_assert_held_write(&tty->ldisc_sem);
637 if (!tty->ldisc) 637 if (!tty->ldisc)
638 return; 638 return;
639 /* 639 /*
@@ -681,7 +681,7 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
681 struct tty_ldisc *ld; 681 struct tty_ldisc *ld;
682 int retval; 682 int retval;
683 683
684 lockdep_assert_held_exclusive(&tty->ldisc_sem); 684 lockdep_assert_held_write(&tty->ldisc_sem);
685 ld = tty_ldisc_get(tty, disc); 685 ld = tty_ldisc_get(tty, disc);
686 if (IS_ERR(ld)) { 686 if (IS_ERR(ld)) {
687 BUG_ON(disc == N_TTY); 687 BUG_ON(disc == N_TTY);
diff --git a/fs/Kconfig b/fs/Kconfig
index f1046cf6ad85..bfb1c6095c7a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -11,7 +11,6 @@ config DCACHE_WORD_ACCESS
11 11
12config VALIDATE_FS_PARSER 12config VALIDATE_FS_PARSER
13 bool "Validate filesystem parameter description" 13 bool "Validate filesystem parameter description"
14 default y
15 help 14 help
16 Enable this to perform validation of the parameter description for a 15 Enable this to perform validation of the parameter description for a
17 filesystem when it is registered. 16 filesystem when it is registered.
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index d441bef72163..915010464572 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -275,9 +275,9 @@ static void afs_break_one_callback(struct afs_server *server,
275 struct afs_super_info *as = AFS_FS_S(cbi->sb); 275 struct afs_super_info *as = AFS_FS_S(cbi->sb);
276 struct afs_volume *volume = as->volume; 276 struct afs_volume *volume = as->volume;
277 277
278 write_lock(&volume->cb_break_lock); 278 write_lock(&volume->cb_v_break_lock);
279 volume->cb_v_break++; 279 volume->cb_v_break++;
280 write_unlock(&volume->cb_break_lock); 280 write_unlock(&volume->cb_v_break_lock);
281 } else { 281 } else {
282 data.volume = NULL; 282 data.volume = NULL;
283 data.fid = *fid; 283 data.fid = *fid;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index b42d9d09669c..18a50d4febcf 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -56,6 +56,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
56} 56}
57 57
58/* 58/*
59 * Set the file size and block count. Estimate the number of 512 bytes blocks
60 * used, rounded up to nearest 1K for consistency with other AFS clients.
61 */
62static void afs_set_i_size(struct afs_vnode *vnode, u64 size)
63{
64 i_size_write(&vnode->vfs_inode, size);
65 vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1;
66}
67
68/*
59 * Initialise an inode from the vnode status. 69 * Initialise an inode from the vnode status.
60 */ 70 */
61static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, 71static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
@@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
124 return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); 134 return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
125 } 135 }
126 136
127 /* 137 afs_set_i_size(vnode, status->size);
128 * Estimate 512 bytes blocks used, rounded up to nearest 1K
129 * for consistency with other AFS clients.
130 */
131 inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1;
132 i_size_write(&vnode->vfs_inode, status->size);
133 138
134 vnode->invalid_before = status->data_version; 139 vnode->invalid_before = status->data_version;
135 inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); 140 inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
@@ -207,11 +212,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
207 212
208 if (expected_version && 213 if (expected_version &&
209 *expected_version != status->data_version) { 214 *expected_version != status->data_version) {
210 kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s", 215 if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
211 (unsigned long long) status->data_version, 216 pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n",
212 vnode->fid.vid, vnode->fid.vnode, 217 vnode->fid.vid, vnode->fid.vnode,
213 (unsigned long long) *expected_version, 218 (unsigned long long)*expected_version,
214 fc->type ? fc->type->name : "???"); 219 (unsigned long long)status->data_version,
220 fc->type ? fc->type->name : "???");
221
215 vnode->invalid_before = status->data_version; 222 vnode->invalid_before = status->data_version;
216 if (vnode->status.type == AFS_FTYPE_DIR) { 223 if (vnode->status.type == AFS_FTYPE_DIR) {
217 if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) 224 if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
@@ -230,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
230 237
231 if (data_changed) { 238 if (data_changed) {
232 inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); 239 inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
233 i_size_write(&vnode->vfs_inode, status->size); 240 afs_set_i_size(vnode, status->size);
234 } 241 }
235} 242}
236 243
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8a67bf741880..7ee63526c6a2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,10 +109,8 @@ struct afs_call {
109 struct rxrpc_call *rxcall; /* RxRPC call handle */ 109 struct rxrpc_call *rxcall; /* RxRPC call handle */
110 struct key *key; /* security for this call */ 110 struct key *key; /* security for this call */
111 struct afs_net *net; /* The network namespace */ 111 struct afs_net *net; /* The network namespace */
112 union { 112 struct afs_server *server; /* The fileserver record if fs op (pins ref) */
113 struct afs_server *server; 113 struct afs_vlserver *vlserver; /* The vlserver record if vl op */
114 struct afs_vlserver *vlserver;
115 };
116 struct afs_cb_interest *cbi; /* Callback interest for server used */ 114 struct afs_cb_interest *cbi; /* Callback interest for server used */
117 struct afs_vnode *lvnode; /* vnode being locked */ 115 struct afs_vnode *lvnode; /* vnode being locked */
118 void *request; /* request data (first part) */ 116 void *request; /* request data (first part) */
@@ -616,7 +614,7 @@ struct afs_volume {
616 unsigned int servers_seq; /* Incremented each time ->servers changes */ 614 unsigned int servers_seq; /* Incremented each time ->servers changes */
617 615
618 unsigned cb_v_break; /* Break-everything counter. */ 616 unsigned cb_v_break; /* Break-everything counter. */
619 rwlock_t cb_break_lock; 617 rwlock_t cb_v_break_lock;
620 618
621 afs_voltype_t type; /* type of volume */ 619 afs_voltype_t type; /* type of volume */
622 short error; 620 short error;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 08fdb3951c49..1a414300b654 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -43,6 +43,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
43 atomic_set(&volume->usage, 1); 43 atomic_set(&volume->usage, 1);
44 INIT_LIST_HEAD(&volume->proc_link); 44 INIT_LIST_HEAD(&volume->proc_link);
45 rwlock_init(&volume->servers_lock); 45 rwlock_init(&volume->servers_lock);
46 rwlock_init(&volume->cb_v_break_lock);
46 memcpy(volume->name, vldb->name, vldb->name_len + 1); 47 memcpy(volume->name, vldb->name, vldb->name_len + 1);
47 48
48 slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); 49 slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
diff --git a/fs/aio.c b/fs/aio.c
index 3490d1fa0e16..c1e581dd32f5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2095,6 +2095,7 @@ SYSCALL_DEFINE6(io_pgetevents,
2095 struct __aio_sigset ksig = { NULL, }; 2095 struct __aio_sigset ksig = { NULL, };
2096 sigset_t ksigmask, sigsaved; 2096 sigset_t ksigmask, sigsaved;
2097 struct timespec64 ts; 2097 struct timespec64 ts;
2098 bool interrupted;
2098 int ret; 2099 int ret;
2099 2100
2100 if (timeout && unlikely(get_timespec64(&ts, timeout))) 2101 if (timeout && unlikely(get_timespec64(&ts, timeout)))
@@ -2108,8 +2109,10 @@ SYSCALL_DEFINE6(io_pgetevents,
2108 return ret; 2109 return ret;
2109 2110
2110 ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); 2111 ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
2111 restore_user_sigmask(ksig.sigmask, &sigsaved); 2112
2112 if (signal_pending(current) && !ret) 2113 interrupted = signal_pending(current);
2114 restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
2115 if (interrupted && !ret)
2113 ret = -ERESTARTNOHAND; 2116 ret = -ERESTARTNOHAND;
2114 2117
2115 return ret; 2118 return ret;
@@ -2128,6 +2131,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
2128 struct __aio_sigset ksig = { NULL, }; 2131 struct __aio_sigset ksig = { NULL, };
2129 sigset_t ksigmask, sigsaved; 2132 sigset_t ksigmask, sigsaved;
2130 struct timespec64 ts; 2133 struct timespec64 ts;
2134 bool interrupted;
2131 int ret; 2135 int ret;
2132 2136
2133 if (timeout && unlikely(get_old_timespec32(&ts, timeout))) 2137 if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
@@ -2142,8 +2146,10 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
2142 return ret; 2146 return ret;
2143 2147
2144 ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); 2148 ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
2145 restore_user_sigmask(ksig.sigmask, &sigsaved); 2149
2146 if (signal_pending(current) && !ret) 2150 interrupted = signal_pending(current);
2151 restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
2152 if (interrupted && !ret)
2147 ret = -ERESTARTNOHAND; 2153 ret = -ERESTARTNOHAND;
2148 2154
2149 return ret; 2155 return ret;
@@ -2193,6 +2199,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
2193 struct __compat_aio_sigset ksig = { NULL, }; 2199 struct __compat_aio_sigset ksig = { NULL, };
2194 sigset_t ksigmask, sigsaved; 2200 sigset_t ksigmask, sigsaved;
2195 struct timespec64 t; 2201 struct timespec64 t;
2202 bool interrupted;
2196 int ret; 2203 int ret;
2197 2204
2198 if (timeout && get_old_timespec32(&t, timeout)) 2205 if (timeout && get_old_timespec32(&t, timeout))
@@ -2206,8 +2213,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
2206 return ret; 2213 return ret;
2207 2214
2208 ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); 2215 ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
2209 restore_user_sigmask(ksig.sigmask, &sigsaved); 2216
2210 if (signal_pending(current) && !ret) 2217 interrupted = signal_pending(current);
2218 restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
2219 if (interrupted && !ret)
2211 ret = -ERESTARTNOHAND; 2220 ret = -ERESTARTNOHAND;
2212 2221
2213 return ret; 2222 return ret;
@@ -2226,6 +2235,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
2226 struct __compat_aio_sigset ksig = { NULL, }; 2235 struct __compat_aio_sigset ksig = { NULL, };
2227 sigset_t ksigmask, sigsaved; 2236 sigset_t ksigmask, sigsaved;
2228 struct timespec64 t; 2237 struct timespec64 t;
2238 bool interrupted;
2229 int ret; 2239 int ret;
2230 2240
2231 if (timeout && get_timespec64(&t, timeout)) 2241 if (timeout && get_timespec64(&t, timeout))
@@ -2239,8 +2249,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
2239 return ret; 2249 return ret;
2240 2250
2241 ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); 2251 ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
2242 restore_user_sigmask(ksig.sigmask, &sigsaved); 2252
2243 if (signal_pending(current) && !ret) 2253 interrupted = signal_pending(current);
2254 restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
2255 if (interrupted && !ret)
2244 ret = -ERESTARTNOHAND; 2256 ret = -ERESTARTNOHAND;
2245 2257
2246 return ret; 2258 return ret;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 82a48e830018..e4b59e76afb0 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -856,9 +856,14 @@ err:
856 856
857static int load_flat_shared_library(int id, struct lib_info *libs) 857static int load_flat_shared_library(int id, struct lib_info *libs)
858{ 858{
859 /*
860 * This is a fake bprm struct; only the members "buf", "file" and
861 * "filename" are actually used.
862 */
859 struct linux_binprm bprm; 863 struct linux_binprm bprm;
860 int res; 864 int res;
861 char buf[16]; 865 char buf[16];
866 loff_t pos = 0;
862 867
863 memset(&bprm, 0, sizeof(bprm)); 868 memset(&bprm, 0, sizeof(bprm));
864 869
@@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
872 if (IS_ERR(bprm.file)) 877 if (IS_ERR(bprm.file))
873 return res; 878 return res;
874 879
875 bprm.cred = prepare_exec_creds(); 880 res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos);
876 res = -ENOMEM;
877 if (!bprm.cred)
878 goto out;
879
880 /* We don't really care about recalculating credentials at this point
881 * as we're past the point of no return and are dealing with shared
882 * libraries.
883 */
884 bprm.called_set_creds = 1;
885 881
886 res = prepare_binprm(&bprm); 882 if (res >= 0)
887
888 if (!res)
889 res = load_flat_file(&bprm, libs, id, NULL); 883 res = load_flat_file(&bprm, libs, id, NULL);
890 884
891 abort_creds(bprm.cred);
892
893out:
894 allow_write_access(bprm.file); 885 allow_write_access(bprm.file);
895 fput(bprm.file); 886 fput(bprm.file);
896 887
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6af2d0d4a87a..c8a9b89b922d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2121,9 +2121,10 @@ retry:
2121 if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { 2121 if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
2122 dout("build_path path+%d: %p SNAPDIR\n", 2122 dout("build_path path+%d: %p SNAPDIR\n",
2123 pos, temp); 2123 pos, temp);
2124 } else if (stop_on_nosnap && inode && 2124 } else if (stop_on_nosnap && inode && dentry != temp &&
2125 ceph_snap(inode) == CEPH_NOSNAP) { 2125 ceph_snap(inode) == CEPH_NOSNAP) {
2126 spin_unlock(&temp->d_lock); 2126 spin_unlock(&temp->d_lock);
2127 pos++; /* get rid of any prepended '/' */
2127 break; 2128 break;
2128 } else { 2129 } else {
2129 pos -= temp->d_name.len; 2130 pos -= temp->d_name.len;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 3fdc6a41b304..9fd56b0acd7e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2372,6 +2372,41 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
2372 kfree(dfs_rsp); 2372 kfree(dfs_rsp);
2373 return rc; 2373 return rc;
2374} 2374}
2375
2376static int
2377parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf,
2378 u32 plen, char **target_path,
2379 struct cifs_sb_info *cifs_sb)
2380{
2381 unsigned int sub_len;
2382 unsigned int sub_offset;
2383
2384 /* We only handle Symbolic Link : MS-FSCC 2.1.2.4 */
2385 if (le32_to_cpu(symlink_buf->ReparseTag) != IO_REPARSE_TAG_SYMLINK) {
2386 cifs_dbg(VFS, "srv returned invalid symlink buffer\n");
2387 return -EIO;
2388 }
2389
2390 sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset);
2391 sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength);
2392 if (sub_offset + 20 > plen ||
2393 sub_offset + sub_len + 20 > plen) {
2394 cifs_dbg(VFS, "srv returned malformed symlink buffer\n");
2395 return -EIO;
2396 }
2397
2398 *target_path = cifs_strndup_from_utf16(
2399 symlink_buf->PathBuffer + sub_offset,
2400 sub_len, true, cifs_sb->local_nls);
2401 if (!(*target_path))
2402 return -ENOMEM;
2403
2404 convert_delimiter(*target_path, '/');
2405 cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
2406
2407 return 0;
2408}
2409
2375#define SMB2_SYMLINK_STRUCT_SIZE \ 2410#define SMB2_SYMLINK_STRUCT_SIZE \
2376 (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp)) 2411 (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
2377 2412
@@ -2401,11 +2436,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
2401 struct kvec close_iov[1]; 2436 struct kvec close_iov[1];
2402 struct smb2_create_rsp *create_rsp; 2437 struct smb2_create_rsp *create_rsp;
2403 struct smb2_ioctl_rsp *ioctl_rsp; 2438 struct smb2_ioctl_rsp *ioctl_rsp;
2404 char *ioctl_buf; 2439 struct reparse_data_buffer *reparse_buf;
2405 u32 plen; 2440 u32 plen;
2406 2441
2407 cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); 2442 cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
2408 2443
2444 *target_path = NULL;
2445
2409 if (smb3_encryption_required(tcon)) 2446 if (smb3_encryption_required(tcon))
2410 flags |= CIFS_TRANSFORM_REQ; 2447 flags |= CIFS_TRANSFORM_REQ;
2411 2448
@@ -2483,17 +2520,36 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
2483 if ((rc == 0) && (is_reparse_point)) { 2520 if ((rc == 0) && (is_reparse_point)) {
2484 /* See MS-FSCC 2.3.23 */ 2521 /* See MS-FSCC 2.3.23 */
2485 2522
2486 ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset); 2523 reparse_buf = (struct reparse_data_buffer *)
2524 ((char *)ioctl_rsp +
2525 le32_to_cpu(ioctl_rsp->OutputOffset));
2487 plen = le32_to_cpu(ioctl_rsp->OutputCount); 2526 plen = le32_to_cpu(ioctl_rsp->OutputCount);
2488 2527
2489 if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > 2528 if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) >
2490 rsp_iov[1].iov_len) { 2529 rsp_iov[1].iov_len) {
2491 cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen); 2530 cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n",
2531 plen);
2532 rc = -EIO;
2533 goto querty_exit;
2534 }
2535
2536 if (plen < 8) {
2537 cifs_dbg(VFS, "reparse buffer is too small. Must be "
2538 "at least 8 bytes but was %d\n", plen);
2539 rc = -EIO;
2540 goto querty_exit;
2541 }
2542
2543 if (plen < le16_to_cpu(reparse_buf->ReparseDataLength) + 8) {
2544 cifs_dbg(VFS, "srv returned invalid reparse buf "
2545 "length: %d\n", plen);
2492 rc = -EIO; 2546 rc = -EIO;
2493 goto querty_exit; 2547 goto querty_exit;
2494 } 2548 }
2495 2549
2496 /* Do stuff with ioctl_buf/plen */ 2550 rc = parse_reparse_symlink(
2551 (struct reparse_symlink_data_buffer *)reparse_buf,
2552 plen, target_path, cifs_sb);
2497 goto querty_exit; 2553 goto querty_exit;
2498 } 2554 }
2499 2555
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c7d5813bebd8..858353d20c39 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -914,7 +914,19 @@ struct reparse_mount_point_data_buffer {
914 __u8 PathBuffer[0]; /* Variable Length */ 914 __u8 PathBuffer[0]; /* Variable Length */
915} __packed; 915} __packed;
916 916
917/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */ 917#define SYMLINK_FLAG_RELATIVE 0x00000001
918
919struct reparse_symlink_data_buffer {
920 __le32 ReparseTag;
921 __le16 ReparseDataLength;
922 __u16 Reserved;
923 __le16 SubstituteNameOffset;
924 __le16 SubstituteNameLength;
925 __le16 PrintNameOffset;
926 __le16 PrintNameLength;
927 __le32 Flags;
928 __u8 PathBuffer[0]; /* Variable Length */
929} __packed;
918 930
919/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */ 931/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */
920 932
diff --git a/fs/dax.c b/fs/dax.c
index 2e48c7ebb973..fe5e33810cd4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -720,12 +720,11 @@ static void *dax_insert_entry(struct xa_state *xas,
720 720
721 xas_reset(xas); 721 xas_reset(xas);
722 xas_lock_irq(xas); 722 xas_lock_irq(xas);
723 if (dax_entry_size(entry) != dax_entry_size(new_entry)) { 723 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
724 void *old;
725
724 dax_disassociate_entry(entry, mapping, false); 726 dax_disassociate_entry(entry, mapping, false);
725 dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); 727 dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
726 }
727
728 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
729 /* 728 /*
730 * Only swap our new entry into the page cache if the current 729 * Only swap our new entry into the page cache if the current
731 * entry is a zero page or an empty entry. If a normal PTE or 730 * entry is a zero page or an empty entry. If a normal PTE or
@@ -734,7 +733,7 @@ static void *dax_insert_entry(struct xa_state *xas,
734 * existing entry is a PMD, we will just leave the PMD in the 733 * existing entry is a PMD, we will just leave the PMD in the
735 * tree and dirty it if necessary. 734 * tree and dirty it if necessary.
736 */ 735 */
737 void *old = dax_lock_entry(xas, new_entry); 736 old = dax_lock_entry(xas, new_entry);
738 WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) | 737 WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
739 DAX_LOCKED)); 738 DAX_LOCKED));
740 entry = new_entry; 739 entry = new_entry;
@@ -1188,7 +1187,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
1188 unsigned flags = 0; 1187 unsigned flags = 0;
1189 1188
1190 if (iov_iter_rw(iter) == WRITE) { 1189 if (iov_iter_rw(iter) == WRITE) {
1191 lockdep_assert_held_exclusive(&inode->i_rwsem); 1190 lockdep_assert_held_write(&inode->i_rwsem);
1192 flags |= IOMAP_WRITE; 1191 flags |= IOMAP_WRITE;
1193 } else { 1192 } else {
1194 lockdep_assert_held(&inode->i_rwsem); 1193 lockdep_assert_held(&inode->i_rwsem);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c6f513100cc9..4c74c768ae43 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2325,7 +2325,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
2325 2325
2326 error = do_epoll_wait(epfd, events, maxevents, timeout); 2326 error = do_epoll_wait(epfd, events, maxevents, timeout);
2327 2327
2328 restore_user_sigmask(sigmask, &sigsaved); 2328 restore_user_sigmask(sigmask, &sigsaved, error == -EINTR);
2329 2329
2330 return error; 2330 return error;
2331} 2331}
@@ -2350,7 +2350,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
2350 2350
2351 err = do_epoll_wait(epfd, events, maxevents, timeout); 2351 err = do_epoll_wait(epfd, events, maxevents, timeout);
2352 2352
2353 restore_user_sigmask(sigmask, &sigsaved); 2353 restore_user_sigmask(sigmask, &sigsaved, err == -EINTR);
2354 2354
2355 return err; 2355 return err;
2356} 2356}
diff --git a/fs/inode.c b/fs/inode.c
index df6542ec3b88..2bf21e2c90fc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -362,7 +362,7 @@ EXPORT_SYMBOL(inc_nlink);
362 362
363static void __address_space_init_once(struct address_space *mapping) 363static void __address_space_init_once(struct address_space *mapping)
364{ 364{
365 xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); 365 xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
366 init_rwsem(&mapping->i_mmap_rwsem); 366 init_rwsem(&mapping->i_mmap_rwsem);
367 INIT_LIST_HEAD(&mapping->private_list); 367 INIT_LIST_HEAD(&mapping->private_list);
368 spin_lock_init(&mapping->private_lock); 368 spin_lock_init(&mapping->private_lock);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 86a2bd721900..4ef62a45045d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -579,6 +579,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
579 state->cur_req++; 579 state->cur_req++;
580 } 580 }
581 581
582 req->file = NULL;
582 req->ctx = ctx; 583 req->ctx = ctx;
583 req->flags = 0; 584 req->flags = 0;
584 /* one is dropped after submission, the other at completion */ 585 /* one is dropped after submission, the other at completion */
@@ -1801,10 +1802,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
1801 req->sequence = ctx->cached_sq_head - 1; 1802 req->sequence = ctx->cached_sq_head - 1;
1802 } 1803 }
1803 1804
1804 if (!io_op_needs_file(s->sqe)) { 1805 if (!io_op_needs_file(s->sqe))
1805 req->file = NULL;
1806 return 0; 1806 return 0;
1807 }
1808 1807
1809 if (flags & IOSQE_FIXED_FILE) { 1808 if (flags & IOSQE_FIXED_FILE) {
1810 if (unlikely(!ctx->user_files || 1809 if (unlikely(!ctx->user_files ||
@@ -2201,11 +2200,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
2201 } 2200 }
2202 2201
2203 ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); 2202 ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
2204 if (ret == -ERESTARTSYS)
2205 ret = -EINTR;
2206 2203
2207 if (sig) 2204 if (sig)
2208 restore_user_sigmask(sig, &sigsaved); 2205 restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS);
2206
2207 if (ret == -ERESTARTSYS)
2208 ret = -EINTR;
2209 2209
2210 return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0; 2210 return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
2211} 2211}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7660c2749c96..6fbc9126367a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2596,11 +2596,12 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
2596 if (!check_mnt(p)) 2596 if (!check_mnt(p))
2597 goto out; 2597 goto out;
2598 2598
2599 /* The thing moved should be either ours or completely unattached. */ 2599 /* The thing moved must be mounted... */
2600 if (attached && !check_mnt(old)) 2600 if (!is_mounted(&old->mnt))
2601 goto out; 2601 goto out;
2602 2602
2603 if (!attached && !(ns && is_anon_ns(ns))) 2603 /* ... and either ours or the root of anon namespace */
2604 if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
2604 goto out; 2605 goto out;
2605 2606
2606 if (old->mnt.mnt_flags & MNT_LOCKED) 2607 if (old->mnt.mnt_flags & MNT_LOCKED)
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index a809989807d6..19f856f45689 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -18,7 +18,7 @@
18 18
19#define NFSDBG_FACILITY NFSDBG_PNFS_LD 19#define NFSDBG_FACILITY NFSDBG_PNFS_LD
20 20
21static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; 21static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
22static unsigned int dataserver_retrans; 22static unsigned int dataserver_retrans;
23 23
24static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); 24static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 618e66078ee5..1a0cdeb3b875 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1563,7 +1563,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
1563 * Never use more than a third of the remaining memory, 1563 * Never use more than a third of the remaining memory,
1564 * unless it's the only way to give this client a slot: 1564 * unless it's the only way to give this client a slot:
1565 */ 1565 */
1566 avail = clamp_t(int, avail, slotsize, total_avail/3); 1566 avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
1567 num = min_t(int, num, avail / slotsize); 1567 num = min_t(int, num, avail / slotsize);
1568 nfsd_drc_mem_used += num * slotsize; 1568 nfsd_drc_mem_used += num * slotsize;
1569 spin_unlock(&nfsd_drc_lock); 1569 spin_unlock(&nfsd_drc_lock);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 84908556ea58..46dcb6f0eccf 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
462 * a program is not able to use ptrace(2) in that case. It is 462 * a program is not able to use ptrace(2) in that case. It is
463 * safe because the task has stopped executing permanently. 463 * safe because the task has stopped executing permanently.
464 */ 464 */
465 if (permitted && (task->flags & PF_DUMPCORE)) { 465 if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
466 if (try_get_task_stack(task)) { 466 if (try_get_task_stack(task)) {
467 eip = KSTK_EIP(task); 467 eip = KSTK_EIP(task);
468 esp = KSTK_ESP(task); 468 esp = KSTK_ESP(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9c8ca6cd3ce4..255f6754c70d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3077,8 +3077,7 @@ static const struct file_operations proc_tgid_base_operations = {
3077 3077
3078struct pid *tgid_pidfd_to_pid(const struct file *file) 3078struct pid *tgid_pidfd_to_pid(const struct file *file)
3079{ 3079{
3080 if (!d_is_dir(file->f_path.dentry) || 3080 if (file->f_op != &proc_tgid_base_operations)
3081 (file->f_op != &proc_tgid_base_operations))
3082 return ERR_PTR(-EBADF); 3081 return ERR_PTR(-EBADF);
3083 3082
3084 return proc_pid(file_inode(file)); 3083 return proc_pid(file_inode(file));
diff --git a/fs/select.c b/fs/select.c
index 6cbc9ff56ba0..a4d8f6e8b63c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -758,10 +758,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
758 return ret; 758 return ret;
759 759
760 ret = core_sys_select(n, inp, outp, exp, to); 760 ret = core_sys_select(n, inp, outp, exp, to);
761 restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
761 ret = poll_select_copy_remaining(&end_time, tsp, type, ret); 762 ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
762 763
763 restore_user_sigmask(sigmask, &sigsaved);
764
765 return ret; 764 return ret;
766} 765}
767 766
@@ -1106,8 +1105,7 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
1106 1105
1107 ret = do_sys_poll(ufds, nfds, to); 1106 ret = do_sys_poll(ufds, nfds, to);
1108 1107
1109 restore_user_sigmask(sigmask, &sigsaved); 1108 restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
1110
1111 /* We can restart this syscall, usually */ 1109 /* We can restart this syscall, usually */
1112 if (ret == -EINTR) 1110 if (ret == -EINTR)
1113 ret = -ERESTARTNOHAND; 1111 ret = -ERESTARTNOHAND;
@@ -1142,8 +1140,7 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
1142 1140
1143 ret = do_sys_poll(ufds, nfds, to); 1141 ret = do_sys_poll(ufds, nfds, to);
1144 1142
1145 restore_user_sigmask(sigmask, &sigsaved); 1143 restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
1146
1147 /* We can restart this syscall, usually */ 1144 /* We can restart this syscall, usually */
1148 if (ret == -EINTR) 1145 if (ret == -EINTR)
1149 ret = -ERESTARTNOHAND; 1146 ret = -ERESTARTNOHAND;
@@ -1350,10 +1347,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
1350 return ret; 1347 return ret;
1351 1348
1352 ret = compat_core_sys_select(n, inp, outp, exp, to); 1349 ret = compat_core_sys_select(n, inp, outp, exp, to);
1350 restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
1353 ret = poll_select_copy_remaining(&end_time, tsp, type, ret); 1351 ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
1354 1352
1355 restore_user_sigmask(sigmask, &sigsaved);
1356
1357 return ret; 1353 return ret;
1358} 1354}
1359 1355
@@ -1425,8 +1421,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
1425 1421
1426 ret = do_sys_poll(ufds, nfds, to); 1422 ret = do_sys_poll(ufds, nfds, to);
1427 1423
1428 restore_user_sigmask(sigmask, &sigsaved); 1424 restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
1429
1430 /* We can restart this syscall, usually */ 1425 /* We can restart this syscall, usually */
1431 if (ret == -EINTR) 1426 if (ret == -EINTR)
1432 ret = -ERESTARTNOHAND; 1427 ret = -ERESTARTNOHAND;
@@ -1461,8 +1456,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
1461 1456
1462 ret = do_sys_poll(ufds, nfds, to); 1457 ret = do_sys_poll(ufds, nfds, to);
1463 1458
1464 restore_user_sigmask(sigmask, &sigsaved); 1459 restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
1465
1466 /* We can restart this syscall, usually */ 1460 /* We can restart this syscall, usually */
1467 if (ret == -EINTR) 1461 if (ret == -EINTR)
1468 ret = -ERESTARTNOHAND; 1462 ret = -ERESTARTNOHAND;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ae0b8b5f69e6..ccbdbd62f0d8 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -40,6 +40,16 @@ enum userfaultfd_state {
40/* 40/*
41 * Start with fault_pending_wqh and fault_wqh so they're more likely 41 * Start with fault_pending_wqh and fault_wqh so they're more likely
42 * to be in the same cacheline. 42 * to be in the same cacheline.
43 *
44 * Locking order:
45 * fd_wqh.lock
46 * fault_pending_wqh.lock
47 * fault_wqh.lock
48 * event_wqh.lock
49 *
50 * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
51 * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
52 * also taken in IRQ context.
43 */ 53 */
44struct userfaultfd_ctx { 54struct userfaultfd_ctx {
45 /* waitqueue head for the pending (i.e. not read) userfaults */ 55 /* waitqueue head for the pending (i.e. not read) userfaults */
@@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
458 blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : 468 blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
459 TASK_KILLABLE; 469 TASK_KILLABLE;
460 470
461 spin_lock(&ctx->fault_pending_wqh.lock); 471 spin_lock_irq(&ctx->fault_pending_wqh.lock);
462 /* 472 /*
463 * After the __add_wait_queue the uwq is visible to userland 473 * After the __add_wait_queue the uwq is visible to userland
464 * through poll/read(). 474 * through poll/read().
@@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
470 * __add_wait_queue. 480 * __add_wait_queue.
471 */ 481 */
472 set_current_state(blocking_state); 482 set_current_state(blocking_state);
473 spin_unlock(&ctx->fault_pending_wqh.lock); 483 spin_unlock_irq(&ctx->fault_pending_wqh.lock);
474 484
475 if (!is_vm_hugetlb_page(vmf->vma)) 485 if (!is_vm_hugetlb_page(vmf->vma))
476 must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, 486 must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
552 * kernel stack can be released after the list_del_init. 562 * kernel stack can be released after the list_del_init.
553 */ 563 */
554 if (!list_empty_careful(&uwq.wq.entry)) { 564 if (!list_empty_careful(&uwq.wq.entry)) {
555 spin_lock(&ctx->fault_pending_wqh.lock); 565 spin_lock_irq(&ctx->fault_pending_wqh.lock);
556 /* 566 /*
557 * No need of list_del_init(), the uwq on the stack 567 * No need of list_del_init(), the uwq on the stack
558 * will be freed shortly anyway. 568 * will be freed shortly anyway.
559 */ 569 */
560 list_del(&uwq.wq.entry); 570 list_del(&uwq.wq.entry);
561 spin_unlock(&ctx->fault_pending_wqh.lock); 571 spin_unlock_irq(&ctx->fault_pending_wqh.lock);
562 } 572 }
563 573
564 /* 574 /*
@@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
583 init_waitqueue_entry(&ewq->wq, current); 593 init_waitqueue_entry(&ewq->wq, current);
584 release_new_ctx = NULL; 594 release_new_ctx = NULL;
585 595
586 spin_lock(&ctx->event_wqh.lock); 596 spin_lock_irq(&ctx->event_wqh.lock);
587 /* 597 /*
588 * After the __add_wait_queue the uwq is visible to userland 598 * After the __add_wait_queue the uwq is visible to userland
589 * through poll/read(). 599 * through poll/read().
@@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
613 break; 623 break;
614 } 624 }
615 625
616 spin_unlock(&ctx->event_wqh.lock); 626 spin_unlock_irq(&ctx->event_wqh.lock);
617 627
618 wake_up_poll(&ctx->fd_wqh, EPOLLIN); 628 wake_up_poll(&ctx->fd_wqh, EPOLLIN);
619 schedule(); 629 schedule();
620 630
621 spin_lock(&ctx->event_wqh.lock); 631 spin_lock_irq(&ctx->event_wqh.lock);
622 } 632 }
623 __set_current_state(TASK_RUNNING); 633 __set_current_state(TASK_RUNNING);
624 spin_unlock(&ctx->event_wqh.lock); 634 spin_unlock_irq(&ctx->event_wqh.lock);
625 635
626 if (release_new_ctx) { 636 if (release_new_ctx) {
627 struct vm_area_struct *vma; 637 struct vm_area_struct *vma;
@@ -918,10 +928,10 @@ wakeup:
918 * the last page faults that may have been already waiting on 928 * the last page faults that may have been already waiting on
919 * the fault_*wqh. 929 * the fault_*wqh.
920 */ 930 */
921 spin_lock(&ctx->fault_pending_wqh.lock); 931 spin_lock_irq(&ctx->fault_pending_wqh.lock);
922 __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); 932 __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
923 __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range); 933 __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
924 spin_unlock(&ctx->fault_pending_wqh.lock); 934 spin_unlock_irq(&ctx->fault_pending_wqh.lock);
925 935
926 /* Flush pending events that may still wait on event_wqh */ 936 /* Flush pending events that may still wait on event_wqh */
927 wake_up_all(&ctx->event_wqh); 937 wake_up_all(&ctx->event_wqh);
@@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
1134 1144
1135 if (!ret && msg->event == UFFD_EVENT_FORK) { 1145 if (!ret && msg->event == UFFD_EVENT_FORK) {
1136 ret = resolve_userfault_fork(ctx, fork_nctx, msg); 1146 ret = resolve_userfault_fork(ctx, fork_nctx, msg);
1137 spin_lock(&ctx->event_wqh.lock); 1147 spin_lock_irq(&ctx->event_wqh.lock);
1138 if (!list_empty(&fork_event)) { 1148 if (!list_empty(&fork_event)) {
1139 /* 1149 /*
1140 * The fork thread didn't abort, so we can 1150 * The fork thread didn't abort, so we can
@@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
1180 if (ret) 1190 if (ret)
1181 userfaultfd_ctx_put(fork_nctx); 1191 userfaultfd_ctx_put(fork_nctx);
1182 } 1192 }
1183 spin_unlock(&ctx->event_wqh.lock); 1193 spin_unlock_irq(&ctx->event_wqh.lock);
1184 } 1194 }
1185 1195
1186 return ret; 1196 return ret;
@@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
1219static void __wake_userfault(struct userfaultfd_ctx *ctx, 1229static void __wake_userfault(struct userfaultfd_ctx *ctx,
1220 struct userfaultfd_wake_range *range) 1230 struct userfaultfd_wake_range *range)
1221{ 1231{
1222 spin_lock(&ctx->fault_pending_wqh.lock); 1232 spin_lock_irq(&ctx->fault_pending_wqh.lock);
1223 /* wake all in the range and autoremove */ 1233 /* wake all in the range and autoremove */
1224 if (waitqueue_active(&ctx->fault_pending_wqh)) 1234 if (waitqueue_active(&ctx->fault_pending_wqh))
1225 __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 1235 __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
1226 range); 1236 range);
1227 if (waitqueue_active(&ctx->fault_wqh)) 1237 if (waitqueue_active(&ctx->fault_wqh))
1228 __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range); 1238 __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
1229 spin_unlock(&ctx->fault_pending_wqh.lock); 1239 spin_unlock_irq(&ctx->fault_pending_wqh.lock);
1230} 1240}
1231 1241
1232static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, 1242static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
@@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
1881 wait_queue_entry_t *wq; 1891 wait_queue_entry_t *wq;
1882 unsigned long pending = 0, total = 0; 1892 unsigned long pending = 0, total = 0;
1883 1893
1884 spin_lock(&ctx->fault_pending_wqh.lock); 1894 spin_lock_irq(&ctx->fault_pending_wqh.lock);
1885 list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { 1895 list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
1886 pending++; 1896 pending++;
1887 total++; 1897 total++;
@@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
1889 list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { 1899 list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
1890 total++; 1900 total++;
1891 } 1901 }
1892 spin_unlock(&ctx->fault_pending_wqh.lock); 1902 spin_unlock_irq(&ctx->fault_pending_wqh.lock);
1893 1903
1894 /* 1904 /*
1895 * If more protocols will be added, there will be all shown 1905 * If more protocols will be added, there will be all shown
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index d7a15096fb3b..370f01d4450f 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -10,24 +10,24 @@
10#include <linux/types.h> 10#include <linux/types.h>
11 11
12typedef struct { 12typedef struct {
13 long long counter; 13 s64 counter;
14} atomic64_t; 14} atomic64_t;
15 15
16#define ATOMIC64_INIT(i) { (i) } 16#define ATOMIC64_INIT(i) { (i) }
17 17
18extern long long atomic64_read(const atomic64_t *v); 18extern s64 atomic64_read(const atomic64_t *v);
19extern void atomic64_set(atomic64_t *v, long long i); 19extern void atomic64_set(atomic64_t *v, s64 i);
20 20
21#define atomic64_set_release(v, i) atomic64_set((v), (i)) 21#define atomic64_set_release(v, i) atomic64_set((v), (i))
22 22
23#define ATOMIC64_OP(op) \ 23#define ATOMIC64_OP(op) \
24extern void atomic64_##op(long long a, atomic64_t *v); 24extern void atomic64_##op(s64 a, atomic64_t *v);
25 25
26#define ATOMIC64_OP_RETURN(op) \ 26#define ATOMIC64_OP_RETURN(op) \
27extern long long atomic64_##op##_return(long long a, atomic64_t *v); 27extern s64 atomic64_##op##_return(s64 a, atomic64_t *v);
28 28
29#define ATOMIC64_FETCH_OP(op) \ 29#define ATOMIC64_FETCH_OP(op) \
30extern long long atomic64_fetch_##op(long long a, atomic64_t *v); 30extern s64 atomic64_fetch_##op(s64 a, atomic64_t *v);
31 31
32#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op) 32#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
33 33
@@ -46,11 +46,11 @@ ATOMIC64_OPS(xor)
46#undef ATOMIC64_OP_RETURN 46#undef ATOMIC64_OP_RETURN
47#undef ATOMIC64_OP 47#undef ATOMIC64_OP
48 48
49extern long long atomic64_dec_if_positive(atomic64_t *v); 49extern s64 atomic64_dec_if_positive(atomic64_t *v);
50#define atomic64_dec_if_positive atomic64_dec_if_positive 50#define atomic64_dec_if_positive atomic64_dec_if_positive
51extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n); 51extern s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
52extern long long atomic64_xchg(atomic64_t *v, long long new); 52extern s64 atomic64_xchg(atomic64_t *v, s64 new);
53extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u); 53extern s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
54#define atomic64_fetch_add_unless atomic64_fetch_add_unless 54#define atomic64_fetch_add_unless atomic64_fetch_add_unless
55 55
56#endif /* _ASM_GENERIC_ATOMIC64_H */ 56#endif /* _ASM_GENERIC_ATOMIC64_H */
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
new file mode 100644
index 000000000000..e94b19782c92
--- /dev/null
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -0,0 +1,50 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __ASM_GENERIC_VSYSCALL_H
3#define __ASM_GENERIC_VSYSCALL_H
4
5#ifndef __ASSEMBLY__
6
7#ifndef __arch_get_k_vdso_data
8static __always_inline struct vdso_data *__arch_get_k_vdso_data(void)
9{
10 return NULL;
11}
12#endif /* __arch_get_k_vdso_data */
13
14#ifndef __arch_update_vdso_data
15static __always_inline int __arch_update_vdso_data(void)
16{
17 return 0;
18}
19#endif /* __arch_update_vdso_data */
20
21#ifndef __arch_get_clock_mode
22static __always_inline int __arch_get_clock_mode(struct timekeeper *tk)
23{
24 return 0;
25}
26#endif /* __arch_get_clock_mode */
27
28#ifndef __arch_use_vsyscall
29static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata)
30{
31 return 1;
32}
33#endif /* __arch_use_vsyscall */
34
35#ifndef __arch_update_vsyscall
36static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata,
37 struct timekeeper *tk)
38{
39}
40#endif /* __arch_update_vsyscall */
41
42#ifndef __arch_sync_vdso_data
43static __always_inline void __arch_sync_vdso_data(struct vdso_data *vdata)
44{
45}
46#endif /* __arch_sync_vdso_data */
47
48#endif /* !__ASSEMBLY__ */
49
50#endif /* __ASM_GENERIC_VSYSCALL_H */
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
new file mode 100644
index 000000000000..a821deb8ecb2
--- /dev/null
+++ b/include/clocksource/hyperv_timer.h
@@ -0,0 +1,107 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3/*
4 * Definitions for the clocksource provided by the Hyper-V
5 * hypervisor to guest VMs, as described in the Hyper-V Top
6 * Level Functional Spec (TLFS).
7 *
8 * Copyright (C) 2019, Microsoft, Inc.
9 *
10 * Author: Michael Kelley <mikelley@microsoft.com>
11 */
12
13#ifndef __CLKSOURCE_HYPERV_TIMER_H
14#define __CLKSOURCE_HYPERV_TIMER_H
15
16#include <linux/clocksource.h>
17#include <linux/math64.h>
18#include <asm/mshyperv.h>
19
20#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
21#define HV_MIN_DELTA_TICKS 1
22
23/* Routines called by the VMbus driver */
24extern int hv_stimer_alloc(int sint);
25extern void hv_stimer_free(void);
26extern void hv_stimer_init(unsigned int cpu);
27extern void hv_stimer_cleanup(unsigned int cpu);
28extern void hv_stimer_global_cleanup(void);
29extern void hv_stimer0_isr(void);
30
31#if IS_ENABLED(CONFIG_HYPERV)
32extern struct clocksource *hyperv_cs;
33extern void hv_init_clocksource(void);
34#endif /* CONFIG_HYPERV */
35
36#ifdef CONFIG_HYPERV_TSCPAGE
37extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
38
39static inline notrace u64
40hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc)
41{
42 u64 scale, offset;
43 u32 sequence;
44
45 /*
46 * The protocol for reading Hyper-V TSC page is specified in Hypervisor
47 * Top-Level Functional Specification ver. 3.0 and above. To get the
48 * reference time we must do the following:
49 * - READ ReferenceTscSequence
50 * A special '0' value indicates the time source is unreliable and we
51 * need to use something else. The currently published specification
52 * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
53 * instead of '0' as the special value, see commit c35b82ef0294.
54 * - ReferenceTime =
55 * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
56 * - READ ReferenceTscSequence again. In case its value has changed
57 * since our first reading we need to discard ReferenceTime and repeat
58 * the whole sequence as the hypervisor was updating the page in
59 * between.
60 */
61 do {
62 sequence = READ_ONCE(tsc_pg->tsc_sequence);
63 if (!sequence)
64 return U64_MAX;
65 /*
66 * Make sure we read sequence before we read other values from
67 * TSC page.
68 */
69 smp_rmb();
70
71 scale = READ_ONCE(tsc_pg->tsc_scale);
72 offset = READ_ONCE(tsc_pg->tsc_offset);
73 *cur_tsc = hv_get_raw_timer();
74
75 /*
76 * Make sure we read sequence after we read all other values
77 * from TSC page.
78 */
79 smp_rmb();
80
81 } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
82
83 return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
84}
85
86static inline notrace u64
87hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
88{
89 u64 cur_tsc;
90
91 return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
92}
93
94#else /* CONFIG_HYPERV_TSC_PAGE */
95static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
96{
97 return NULL;
98}
99
100static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
101 u64 *cur_tsc)
102{
103 return U64_MAX;
104}
105#endif /* CONFIG_HYPERV_TSCPAGE */
106
107#endif
diff --git a/include/clocksource/timer-davinci.h b/include/clocksource/timer-davinci.h
new file mode 100644
index 000000000000..1dcc1333fbc8
--- /dev/null
+++ b/include/clocksource/timer-davinci.h
@@ -0,0 +1,44 @@
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * TI DaVinci clocksource driver
4 *
5 * Copyright (C) 2019 Texas Instruments
6 * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
7 */
8
9#ifndef __TIMER_DAVINCI_H__
10#define __TIMER_DAVINCI_H__
11
12#include <linux/clk.h>
13#include <linux/ioport.h>
14
15enum {
16 DAVINCI_TIMER_CLOCKEVENT_IRQ,
17 DAVINCI_TIMER_CLOCKSOURCE_IRQ,
18 DAVINCI_TIMER_NUM_IRQS,
19};
20
21/**
22 * struct davinci_timer_cfg - davinci clocksource driver configuration struct
23 * @reg: register range resource
24 * @irq: clockevent and clocksource interrupt resources
25 * @cmp_off: if set - it specifies the compare register used for clockevent
26 *
27 * Note: if the compare register is specified, the driver will use the bottom
28 * clock half for both clocksource and clockevent and the compare register
29 * to generate event irqs. The user must supply the correct compare register
30 * interrupt number.
31 *
32 * This is only used by da830 the DSP of which uses the top half. The timer
33 * driver still configures the top half to run in free-run mode.
34 */
35struct davinci_timer_cfg {
36 struct resource reg;
37 struct resource irq[DAVINCI_TIMER_NUM_IRQS];
38 unsigned int cmp_off;
39};
40
41int __init davinci_timer_register(struct clk *clk,
42 const struct davinci_timer_cfg *data);
43
44#endif /* __TIMER_DAVINCI_H__ */
diff --git a/include/dt-bindings/clock/g12a-clkc.h b/include/dt-bindings/clock/g12a-clkc.h
index 82c9e0c020b2..e10470ed7c4f 100644
--- a/include/dt-bindings/clock/g12a-clkc.h
+++ b/include/dt-bindings/clock/g12a-clkc.h
@@ -130,7 +130,7 @@
130#define CLKID_MALI_1_SEL 172 130#define CLKID_MALI_1_SEL 172
131#define CLKID_MALI_1 174 131#define CLKID_MALI_1 174
132#define CLKID_MALI 175 132#define CLKID_MALI 175
133#define CLKID_MPLL_5OM 177 133#define CLKID_MPLL_50M 177
134#define CLKID_CPU_CLK 187 134#define CLKID_CPU_CLK 187
135#define CLKID_PCIE_PLL 201 135#define CLKID_PCIE_PLL 201
136#define CLKID_VDEC_1 204 136#define CLKID_VDEC_1 204
diff --git a/include/dt-bindings/clock/sifive-fu540-prci.h b/include/dt-bindings/clock/sifive-fu540-prci.h
index 6a0b70a37d78..3b21d0522c91 100644
--- a/include/dt-bindings/clock/sifive-fu540-prci.h
+++ b/include/dt-bindings/clock/sifive-fu540-prci.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
2/* 2/*
3 * Copyright (C) 2018-2019 SiFive, Inc. 3 * Copyright (C) 2018-2019 SiFive, Inc.
4 * Wesley Terpstra 4 * Wesley Terpstra
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d315d86844e4..469be6844703 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -10,6 +10,7 @@
10 10
11#include <linux/errno.h> 11#include <linux/errno.h>
12#include <linux/ioport.h> /* for struct resource */ 12#include <linux/ioport.h> /* for struct resource */
13#include <linux/irqdomain.h>
13#include <linux/resource_ext.h> 14#include <linux/resource_ext.h>
14#include <linux/device.h> 15#include <linux/device.h>
15#include <linux/property.h> 16#include <linux/property.h>
@@ -314,6 +315,12 @@ int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi);
314void acpi_set_irq_model(enum acpi_irq_model_id model, 315void acpi_set_irq_model(enum acpi_irq_model_id model,
315 struct fwnode_handle *fwnode); 316 struct fwnode_handle *fwnode);
316 317
318struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
319 unsigned int size,
320 struct fwnode_handle *fwnode,
321 const struct irq_domain_ops *ops,
322 void *host_data);
323
317#ifdef CONFIG_X86_IO_APIC 324#ifdef CONFIG_X86_IO_APIC
318extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); 325extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
319#else 326#else
@@ -1303,6 +1310,7 @@ static inline int lpit_read_residency_count_address(u64 *address)
1303#ifdef CONFIG_ACPI_PPTT 1310#ifdef CONFIG_ACPI_PPTT
1304int find_acpi_cpu_topology(unsigned int cpu, int level); 1311int find_acpi_cpu_topology(unsigned int cpu, int level);
1305int find_acpi_cpu_topology_package(unsigned int cpu); 1312int find_acpi_cpu_topology_package(unsigned int cpu);
1313int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
1306int find_acpi_cpu_cache_topology(unsigned int cpu, int level); 1314int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
1307#else 1315#else
1308static inline int find_acpi_cpu_topology(unsigned int cpu, int level) 1316static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
@@ -1313,6 +1321,10 @@ static inline int find_acpi_cpu_topology_package(unsigned int cpu)
1313{ 1321{
1314 return -EINVAL; 1322 return -EINVAL;
1315} 1323}
1324static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
1325{
1326 return -EINVAL;
1327}
1316static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) 1328static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
1317{ 1329{
1318 return -EINVAL; 1330 return -EINVAL;
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 70e19bc6cc9f..46b92cd61d0c 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -17,6 +17,8 @@ enum cache_type {
17 CACHE_TYPE_UNIFIED = BIT(2), 17 CACHE_TYPE_UNIFIED = BIT(2),
18}; 18};
19 19
20extern unsigned int coherency_max_size;
21
20/** 22/**
21 * struct cacheinfo - represent a cache leaf node 23 * struct cacheinfo - represent a cache leaf node
22 * @id: This cache's id. It is unique among caches with the same (type, level). 24 * @id: This cache's id. It is unique among caches with the same (type, level).
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 5c6062206760..87c211adf49e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -116,10 +116,10 @@ enum cpuhp_state {
116 CPUHP_AP_PERF_ARM_ACPI_STARTING, 116 CPUHP_AP_PERF_ARM_ACPI_STARTING,
117 CPUHP_AP_PERF_ARM_STARTING, 117 CPUHP_AP_PERF_ARM_STARTING,
118 CPUHP_AP_ARM_L2X0_STARTING, 118 CPUHP_AP_ARM_L2X0_STARTING,
119 CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
119 CPUHP_AP_ARM_ARCH_TIMER_STARTING, 120 CPUHP_AP_ARM_ARCH_TIMER_STARTING,
120 CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, 121 CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
121 CPUHP_AP_JCORE_TIMER_STARTING, 122 CPUHP_AP_JCORE_TIMER_STARTING,
122 CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
123 CPUHP_AP_ARM_TWD_STARTING, 123 CPUHP_AP_ARM_TWD_STARTING,
124 CPUHP_AP_QCOM_TIMER_STARTING, 124 CPUHP_AP_QCOM_TIMER_STARTING,
125 CPUHP_AP_TEGRA_TIMER_STARTING, 125 CPUHP_AP_TEGRA_TIMER_STARTING,
diff --git a/include/linux/device.h b/include/linux/device.h
index 848fc71c6ba6..4a295e324ac5 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -704,7 +704,8 @@ extern unsigned long devm_get_free_pages(struct device *dev,
704 gfp_t gfp_mask, unsigned int order); 704 gfp_t gfp_mask, unsigned int order);
705extern void devm_free_pages(struct device *dev, unsigned long addr); 705extern void devm_free_pages(struct device *dev, unsigned long addr);
706 706
707void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res); 707void __iomem *devm_ioremap_resource(struct device *dev,
708 const struct resource *res);
708 709
709void __iomem *devm_of_iomap(struct device *dev, 710void __iomem *devm_of_iomap(struct device *dev,
710 struct device_node *node, int index, 711 struct device_node *node, int index,
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2e8957eac4d4..4971100a8cab 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -12,8 +12,8 @@
12#ifndef _LINUX_HRTIMER_H 12#ifndef _LINUX_HRTIMER_H
13#define _LINUX_HRTIMER_H 13#define _LINUX_HRTIMER_H
14 14
15#include <linux/hrtimer_defs.h>
15#include <linux/rbtree.h> 16#include <linux/rbtree.h>
16#include <linux/ktime.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/percpu.h> 19#include <linux/percpu.h>
@@ -298,26 +298,12 @@ struct clock_event_device;
298 298
299extern void hrtimer_interrupt(struct clock_event_device *dev); 299extern void hrtimer_interrupt(struct clock_event_device *dev);
300 300
301/*
302 * The resolution of the clocks. The resolution value is returned in
303 * the clock_getres() system call to give application programmers an
304 * idea of the (in)accuracy of timers. Timer values are rounded up to
305 * this resolution values.
306 */
307# define HIGH_RES_NSEC 1
308# define KTIME_HIGH_RES (HIGH_RES_NSEC)
309# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
310# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
311
312extern void clock_was_set_delayed(void); 301extern void clock_was_set_delayed(void);
313 302
314extern unsigned int hrtimer_resolution; 303extern unsigned int hrtimer_resolution;
315 304
316#else 305#else
317 306
318# define MONOTONIC_RES_NSEC LOW_RES_NSEC
319# define KTIME_MONOTONIC_RES KTIME_LOW_RES
320
321#define hrtimer_resolution (unsigned int)LOW_RES_NSEC 307#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
322 308
323static inline void clock_was_set_delayed(void) { } 309static inline void clock_was_set_delayed(void) { }
diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
new file mode 100644
index 000000000000..2d3e3c5fb946
--- /dev/null
+++ b/include/linux/hrtimer_defs.h
@@ -0,0 +1,27 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_HRTIMER_DEFS_H
3#define _LINUX_HRTIMER_DEFS_H
4
5#include <linux/ktime.h>
6
7#ifdef CONFIG_HIGH_RES_TIMERS
8
9/*
10 * The resolution of the clocks. The resolution value is returned in
11 * the clock_getres() system call to give application programmers an
12 * idea of the (in)accuracy of timers. Timer values are rounded up to
13 * this resolution values.
14 */
15# define HIGH_RES_NSEC 1
16# define KTIME_HIGH_RES (HIGH_RES_NSEC)
17# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
18# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
19
20#else
21
22# define MONOTONIC_RES_NSEC LOW_RES_NSEC
23# define KTIME_MONOTONIC_RES KTIME_LOW_RES
24
25#endif
26
27#endif
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index 16255c2ca2f4..0d6b4bc191c5 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -103,6 +103,7 @@ void ishtp_put_device(struct ishtp_cl_device *cl_dev);
103void ishtp_get_device(struct ishtp_cl_device *cl_dev); 103void ishtp_get_device(struct ishtp_cl_device *cl_dev);
104void ishtp_set_drvdata(struct ishtp_cl_device *cl_device, void *data); 104void ishtp_set_drvdata(struct ishtp_cl_device *cl_device, void *data);
105void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device); 105void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device);
106struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *dev);
106int ishtp_register_event_cb(struct ishtp_cl_device *device, 107int ishtp_register_event_cb(struct ishtp_cl_device *device,
107 void (*read_cb)(struct ishtp_cl_device *)); 108 void (*read_cb)(struct ishtp_cl_device *));
108struct ishtp_fw_client *ishtp_fw_cl_get_client(struct ishtp_device *dev, 109struct ishtp_fw_client *ishtp_fw_cl_get_client(struct ishtp_device *dev,
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
index 626283858563..b9850f5f1906 100644
--- a/include/linux/irqchip/arm-gic-common.h
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -36,4 +36,9 @@ struct gic_kvm_info {
36 36
37const struct gic_kvm_info *gic_get_kvm_info(void); 37const struct gic_kvm_info *gic_get_kvm_info(void);
38 38
39struct irq_domain;
40struct fwnode_handle;
41int gicv2m_init(struct fwnode_handle *parent_handle,
42 struct irq_domain *parent);
43
39#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */ 44#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 316087da1d09..5686711b0f40 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -157,9 +157,6 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq);
157 */ 157 */
158void gic_init(void __iomem *dist , void __iomem *cpu); 158void gic_init(void __iomem *dist , void __iomem *cpu);
159 159
160int gicv2m_init(struct fwnode_handle *parent_handle,
161 struct irq_domain *parent);
162
163void gic_send_sgi(unsigned int cpu_id, unsigned int irq); 160void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
164int gic_get_cpu_id(unsigned int cpu); 161int gic_get_cpu_id(unsigned int cpu);
165void gic_migrate_target(unsigned int new_cpu_id); 162void gic_migrate_target(unsigned int new_cpu_id);
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 3e113a1fa0f1..3526c0aee954 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -215,6 +215,9 @@ extern void arch_jump_label_transform(struct jump_entry *entry,
215 enum jump_label_type type); 215 enum jump_label_type type);
216extern void arch_jump_label_transform_static(struct jump_entry *entry, 216extern void arch_jump_label_transform_static(struct jump_entry *entry,
217 enum jump_label_type type); 217 enum jump_label_type type);
218extern bool arch_jump_label_transform_queue(struct jump_entry *entry,
219 enum jump_label_type type);
220extern void arch_jump_label_transform_apply(void);
218extern int jump_label_text_reserved(void *start, void *end); 221extern int jump_label_text_reserved(void *start, void *end);
219extern void static_key_slow_inc(struct static_key *key); 222extern void static_key_slow_inc(struct static_key *key);
220extern void static_key_slow_dec(struct static_key *key); 223extern void static_key_slow_dec(struct static_key *key);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 74b1ee9027f5..0c9bc231107f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -93,7 +93,8 @@
93#define DIV_ROUND_DOWN_ULL(ll, d) \ 93#define DIV_ROUND_DOWN_ULL(ll, d) \
94 ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) 94 ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; })
95 95
96#define DIV_ROUND_UP_ULL(ll, d) DIV_ROUND_DOWN_ULL((ll) + (d) - 1, (d)) 96#define DIV_ROUND_UP_ULL(ll, d) \
97 DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d))
97 98
98#if BITS_PER_LONG == 32 99#if BITS_PER_LONG == 32
99# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) 100# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6e2377e6c1d6..57baa27f238c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -203,11 +203,17 @@ struct lock_list {
203 struct lock_list *parent; 203 struct lock_list *parent;
204}; 204};
205 205
206/* 206/**
207 * We record lock dependency chains, so that we can cache them: 207 * struct lock_chain - lock dependency chain record
208 *
209 * @irq_context: the same as irq_context in held_lock below
210 * @depth: the number of held locks in this chain
211 * @base: the index in chain_hlocks for this chain
212 * @entry: the collided lock chains in lock_chain hash list
213 * @chain_key: the hash key of this lock_chain
208 */ 214 */
209struct lock_chain { 215struct lock_chain {
210 /* see BUILD_BUG_ON()s in lookup_chain_cache() */ 216 /* see BUILD_BUG_ON()s in add_chain_cache() */
211 unsigned int irq_context : 2, 217 unsigned int irq_context : 2,
212 depth : 6, 218 depth : 6,
213 base : 24; 219 base : 24;
@@ -217,12 +223,8 @@ struct lock_chain {
217}; 223};
218 224
219#define MAX_LOCKDEP_KEYS_BITS 13 225#define MAX_LOCKDEP_KEYS_BITS 13
220/* 226#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
221 * Subtract one because we offset hlock->class_idx by 1 in order 227#define INITIAL_CHAIN_KEY -1
222 * to make 0 mean no class. This avoids overflowing the class_idx
223 * bitfield and hitting the BUG in hlock_class().
224 */
225#define MAX_LOCKDEP_KEYS ((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
226 228
227struct held_lock { 229struct held_lock {
228 /* 230 /*
@@ -247,6 +249,11 @@ struct held_lock {
247 u64 waittime_stamp; 249 u64 waittime_stamp;
248 u64 holdtime_stamp; 250 u64 holdtime_stamp;
249#endif 251#endif
252 /*
253 * class_idx is zero-indexed; it points to the element in
254 * lock_classes this held lock instance belongs to. class_idx is in
255 * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive.
256 */
250 unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS; 257 unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS;
251 /* 258 /*
252 * The lock-stack is unified in that the lock chains of interrupt 259 * The lock-stack is unified in that the lock chains of interrupt
@@ -281,6 +288,8 @@ extern void lockdep_free_key_range(void *start, unsigned long size);
281extern asmlinkage void lockdep_sys_exit(void); 288extern asmlinkage void lockdep_sys_exit(void);
282extern void lockdep_set_selftest_task(struct task_struct *task); 289extern void lockdep_set_selftest_task(struct task_struct *task);
283 290
291extern void lockdep_init_task(struct task_struct *task);
292
284extern void lockdep_off(void); 293extern void lockdep_off(void);
285extern void lockdep_on(void); 294extern void lockdep_on(void);
286 295
@@ -385,7 +394,7 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
385 WARN_ON(debug_locks && !lockdep_is_held(l)); \ 394 WARN_ON(debug_locks && !lockdep_is_held(l)); \
386 } while (0) 395 } while (0)
387 396
388#define lockdep_assert_held_exclusive(l) do { \ 397#define lockdep_assert_held_write(l) do { \
389 WARN_ON(debug_locks && !lockdep_is_held_type(l, 0)); \ 398 WARN_ON(debug_locks && !lockdep_is_held_type(l, 0)); \
390 } while (0) 399 } while (0)
391 400
@@ -405,6 +414,10 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
405 414
406#else /* !CONFIG_LOCKDEP */ 415#else /* !CONFIG_LOCKDEP */
407 416
417static inline void lockdep_init_task(struct task_struct *task)
418{
419}
420
408static inline void lockdep_off(void) 421static inline void lockdep_off(void)
409{ 422{
410} 423}
@@ -466,7 +479,7 @@ struct lockdep_map { };
466#define lockdep_is_held_type(l, r) (1) 479#define lockdep_is_held_type(l, r) (1)
467 480
468#define lockdep_assert_held(l) do { (void)(l); } while (0) 481#define lockdep_assert_held(l) do { (void)(l); } while (0)
469#define lockdep_assert_held_exclusive(l) do { (void)(l); } while (0) 482#define lockdep_assert_held_write(l) do { (void)(l); } while (0)
470#define lockdep_assert_held_read(l) do { (void)(l); } while (0) 483#define lockdep_assert_held_read(l) do { (void)(l); } while (0)
471#define lockdep_assert_held_once(l) do { (void)(l); } while (0) 484#define lockdep_assert_held_once(l) do { (void)(l); } while (0)
472 485
@@ -497,7 +510,6 @@ enum xhlock_context_t {
497 { .name = (_name), .key = (void *)(_key), } 510 { .name = (_name), .key = (void *)(_key), }
498 511
499static inline void lockdep_invariant_state(bool force) {} 512static inline void lockdep_invariant_state(bool force) {}
500static inline void lockdep_init_task(struct task_struct *task) {}
501static inline void lockdep_free_task(struct task_struct *task) {} 513static inline void lockdep_free_task(struct task_struct *task) {}
502 514
503#ifdef CONFIG_LOCK_STAT 515#ifdef CONFIG_LOCK_STAT
@@ -632,11 +644,18 @@ do { \
632 "IRQs not disabled as expected\n"); \ 644 "IRQs not disabled as expected\n"); \
633 } while (0) 645 } while (0)
634 646
647#define lockdep_assert_in_irq() do { \
648 WARN_ONCE(debug_locks && !current->lockdep_recursion && \
649 !current->hardirq_context, \
650 "Not in hardirq as expected\n"); \
651 } while (0)
652
635#else 653#else
636# define might_lock(lock) do { } while (0) 654# define might_lock(lock) do { } while (0)
637# define might_lock_read(lock) do { } while (0) 655# define might_lock_read(lock) do { } while (0)
638# define lockdep_assert_irqs_enabled() do { } while (0) 656# define lockdep_assert_irqs_enabled() do { } while (0)
639# define lockdep_assert_irqs_disabled() do { } while (0) 657# define lockdep_assert_irqs_disabled() do { } while (0)
658# define lockdep_assert_in_irq() do { } while (0)
640#endif 659#endif
641 660
642#ifdef CONFIG_LOCKDEP 661#ifdef CONFIG_LOCKDEP
diff --git a/include/linux/module.h b/include/linux/module.h
index 188998d3dca9..1455812dd325 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -21,6 +21,7 @@
21#include <linux/rbtree_latch.h> 21#include <linux/rbtree_latch.h>
22#include <linux/error-injection.h> 22#include <linux/error-injection.h>
23#include <linux/tracepoint-defs.h> 23#include <linux/tracepoint-defs.h>
24#include <linux/srcu.h>
24 25
25#include <linux/percpu.h> 26#include <linux/percpu.h>
26#include <asm/module.h> 27#include <asm/module.h>
@@ -450,6 +451,10 @@ struct module {
450 unsigned int num_tracepoints; 451 unsigned int num_tracepoints;
451 tracepoint_ptr_t *tracepoints_ptrs; 452 tracepoint_ptr_t *tracepoints_ptrs;
452#endif 453#endif
454#ifdef CONFIG_TREE_SRCU
455 unsigned int num_srcu_structs;
456 struct srcu_struct **srcu_struct_ptrs;
457#endif
453#ifdef CONFIG_BPF_EVENTS 458#ifdef CONFIG_BPF_EVENTS
454 unsigned int num_bpf_raw_events; 459 unsigned int num_bpf_raw_events;
455 struct bpf_raw_event_map *bpf_raw_events; 460 struct bpf_raw_event_map *bpf_raw_events;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index b3d360b0ee3d..9f57cdfcc93d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -373,6 +373,8 @@ struct flash_info;
373 * @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR 373 * @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR
374 * @flash_is_locked: [FLASH-SPECIFIC] check if a region of the SPI NOR is 374 * @flash_is_locked: [FLASH-SPECIFIC] check if a region of the SPI NOR is
375 * @quad_enable: [FLASH-SPECIFIC] enables SPI NOR quad mode 375 * @quad_enable: [FLASH-SPECIFIC] enables SPI NOR quad mode
376 * @clear_sr_bp: [FLASH-SPECIFIC] clears the Block Protection Bits from
377 * the SPI NOR Status Register.
376 * completely locked 378 * completely locked
377 * @priv: the private data 379 * @priv: the private data
378 */ 380 */
@@ -410,6 +412,7 @@ struct spi_nor {
410 int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len); 412 int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
411 int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len); 413 int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
412 int (*quad_enable)(struct spi_nor *nor); 414 int (*quad_enable)(struct spi_nor *nor);
415 int (*clear_sr_bp)(struct spi_nor *nor);
413 416
414 void *priv; 417 void *priv;
415}; 418};
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9ec3544baee2..fe0b29bf2df7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -333,19 +333,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
333 mapping_gfp_mask(mapping)); 333 mapping_gfp_mask(mapping));
334} 334}
335 335
336static inline struct page *find_subpage(struct page *page, pgoff_t offset)
337{
338 unsigned long mask;
339
340 if (PageHuge(page))
341 return page;
342
343 VM_BUG_ON_PAGE(PageTail(page), page);
344
345 mask = (1UL << compound_order(page)) - 1;
346 return page + (offset & mask);
347}
348
349struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); 336struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
350struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); 337struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
351unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 338unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 03cb4b6f842e..3998cdf9cd14 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -17,14 +17,18 @@ struct percpu_rw_semaphore {
17 int readers_block; 17 int readers_block;
18}; 18};
19 19
20#define DEFINE_STATIC_PERCPU_RWSEM(name) \ 20#define __DEFINE_PERCPU_RWSEM(name, is_static) \
21static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ 21static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
22static struct percpu_rw_semaphore name = { \ 22is_static struct percpu_rw_semaphore name = { \
23 .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ 23 .rss = __RCU_SYNC_INITIALIZER(name.rss), \
24 .read_count = &__percpu_rwsem_rc_##name, \ 24 .read_count = &__percpu_rwsem_rc_##name, \
25 .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ 25 .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
26 .writer = __RCUWAIT_INITIALIZER(name.writer), \ 26 .writer = __RCUWAIT_INITIALIZER(name.writer), \
27} 27}
28#define DEFINE_PERCPU_RWSEM(name) \
29 __DEFINE_PERCPU_RWSEM(name, /* not static */)
30#define DEFINE_STATIC_PERCPU_RWSEM(name) \
31 __DEFINE_PERCPU_RWSEM(name, static)
28 32
29extern int __percpu_down_read(struct percpu_rw_semaphore *, int); 33extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
30extern void __percpu_up_read(struct percpu_rw_semaphore *); 34extern void __percpu_up_read(struct percpu_rw_semaphore *);
@@ -117,7 +121,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
117 lock_release(&sem->rw_sem.dep_map, 1, ip); 121 lock_release(&sem->rw_sem.dep_map, 1, ip);
118#ifdef CONFIG_RWSEM_SPIN_ON_OWNER 122#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
119 if (!read) 123 if (!read)
120 sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN; 124 atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
121#endif 125#endif
122} 126}
123 127
@@ -127,7 +131,7 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
127 lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip); 131 lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
128#ifdef CONFIG_RWSEM_SPIN_ON_OWNER 132#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
129 if (!read) 133 if (!read)
130 sem->rw_sem.owner = current; 134 atomic_long_set(&sem->rw_sem.owner, (long)current);
131#endif 135#endif
132} 136}
133 137
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index a9b0ee408fbd..71f525a35ac2 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -171,4 +171,6 @@ void armpmu_free_irq(int irq, int cpu);
171 171
172#endif /* CONFIG_ARM_PMU */ 172#endif /* CONFIG_ARM_PMU */
173 173
174#define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
175
174#endif /* __ARM_PMU_H__ */ 176#endif /* __ARM_PMU_H__ */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0ab99c7b652d..2bca72f3028b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -241,6 +241,7 @@ struct perf_event;
241#define PERF_PMU_CAP_NO_INTERRUPT 0x01 241#define PERF_PMU_CAP_NO_INTERRUPT 0x01
242#define PERF_PMU_CAP_NO_NMI 0x02 242#define PERF_PMU_CAP_NO_NMI 0x02
243#define PERF_PMU_CAP_AUX_NO_SG 0x04 243#define PERF_PMU_CAP_AUX_NO_SG 0x04
244#define PERF_PMU_CAP_EXTENDED_REGS 0x08
244#define PERF_PMU_CAP_EXCLUSIVE 0x10 245#define PERF_PMU_CAP_EXCLUSIVE 0x10
245#define PERF_PMU_CAP_ITRACE 0x20 246#define PERF_PMU_CAP_ITRACE 0x20
246#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 247#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
index 476747456bca..2d12e97d5e7b 100644
--- a/include/linux/perf_regs.h
+++ b/include/linux/perf_regs.h
@@ -11,6 +11,11 @@ struct perf_regs {
11 11
12#ifdef CONFIG_HAVE_PERF_REGS 12#ifdef CONFIG_HAVE_PERF_REGS
13#include <asm/perf_regs.h> 13#include <asm/perf_regs.h>
14
15#ifndef PERF_REG_EXTENDED_MASK
16#define PERF_REG_EXTENDED_MASK 0
17#endif
18
14u64 perf_reg_value(struct pt_regs *regs, int idx); 19u64 perf_reg_value(struct pt_regs *regs, int idx);
15int perf_reg_validate(u64 mask); 20int perf_reg_validate(u64 mask);
16u64 perf_reg_abi(struct task_struct *task); 21u64 perf_reg_abi(struct task_struct *task);
@@ -18,6 +23,9 @@ void perf_get_regs_user(struct perf_regs *regs_user,
18 struct pt_regs *regs, 23 struct pt_regs *regs,
19 struct pt_regs *regs_user_copy); 24 struct pt_regs *regs_user_copy);
20#else 25#else
26
27#define PERF_REG_EXTENDED_MASK 0
28
21static inline u64 perf_reg_value(struct pt_regs *regs, int idx) 29static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
22{ 30{
23 return 0; 31 return 0;
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 7bb77850c65a..3c202a11a79e 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -68,7 +68,7 @@ static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
68 68
69static inline void *pfn_t_to_virt(pfn_t pfn) 69static inline void *pfn_t_to_virt(pfn_t pfn)
70{ 70{
71 if (pfn_t_has_page(pfn)) 71 if (pfn_t_has_page(pfn) && !is_device_private_page(pfn_t_to_page(pfn)))
72 return __va(pfn_t_to_phys(pfn)); 72 return __va(pfn_t_to_phys(pfn));
73 return NULL; 73 return NULL;
74} 74}
diff --git a/include/linux/processor.h b/include/linux/processor.h
index dbc952eec869..dc78bdc7079a 100644
--- a/include/linux/processor.h
+++ b/include/linux/processor.h
@@ -32,15 +32,6 @@
32#define spin_cpu_relax() cpu_relax() 32#define spin_cpu_relax() cpu_relax()
33#endif 33#endif
34 34
35/*
36 * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
37 * necessary. This should be used if the wait is expected to take longer
38 * than context switch overhead, but we can't sleep or do a directed yield.
39 */
40#ifndef spin_cpu_yield
41#define spin_cpu_yield() cpu_relax_yield()
42#endif
43
44#ifndef spin_end 35#ifndef spin_end
45#define spin_end() 36#define spin_end()
46#endif 37#endif
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 6fc53a1345b3..9b83865d24f9 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -13,62 +13,44 @@
13#include <linux/wait.h> 13#include <linux/wait.h>
14#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
15 15
16enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
17
18/* Structure to mediate between updaters and fastpath-using readers. */ 16/* Structure to mediate between updaters and fastpath-using readers. */
19struct rcu_sync { 17struct rcu_sync {
20 int gp_state; 18 int gp_state;
21 int gp_count; 19 int gp_count;
22 wait_queue_head_t gp_wait; 20 wait_queue_head_t gp_wait;
23 21
24 int cb_state;
25 struct rcu_head cb_head; 22 struct rcu_head cb_head;
26
27 enum rcu_sync_type gp_type;
28}; 23};
29 24
30extern void rcu_sync_lockdep_assert(struct rcu_sync *);
31
32/** 25/**
33 * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? 26 * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
34 * @rsp: Pointer to rcu_sync structure to use for synchronization 27 * @rsp: Pointer to rcu_sync structure to use for synchronization
35 * 28 *
36 * Returns true if readers are permitted to use their fastpaths. 29 * Returns true if readers are permitted to use their fastpaths. Must be
37 * Must be invoked within an RCU read-side critical section whose 30 * invoked within some flavor of RCU read-side critical section.
38 * flavor matches that of the rcu_sync struture.
39 */ 31 */
40static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) 32static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
41{ 33{
42#ifdef CONFIG_PROVE_RCU 34 RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
43 rcu_sync_lockdep_assert(rsp); 35 !rcu_read_lock_bh_held() &&
44#endif 36 !rcu_read_lock_sched_held(),
45 return !rsp->gp_state; /* GP_IDLE */ 37 "suspicious rcu_sync_is_idle() usage");
38 return !READ_ONCE(rsp->gp_state); /* GP_IDLE */
46} 39}
47 40
48extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); 41extern void rcu_sync_init(struct rcu_sync *);
49extern void rcu_sync_enter_start(struct rcu_sync *); 42extern void rcu_sync_enter_start(struct rcu_sync *);
50extern void rcu_sync_enter(struct rcu_sync *); 43extern void rcu_sync_enter(struct rcu_sync *);
51extern void rcu_sync_exit(struct rcu_sync *); 44extern void rcu_sync_exit(struct rcu_sync *);
52extern void rcu_sync_dtor(struct rcu_sync *); 45extern void rcu_sync_dtor(struct rcu_sync *);
53 46
54#define __RCU_SYNC_INITIALIZER(name, type) { \ 47#define __RCU_SYNC_INITIALIZER(name) { \
55 .gp_state = 0, \ 48 .gp_state = 0, \
56 .gp_count = 0, \ 49 .gp_count = 0, \
57 .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ 50 .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
58 .cb_state = 0, \
59 .gp_type = type, \
60 } 51 }
61 52
62#define __DEFINE_RCU_SYNC(name, type) \ 53#define DEFINE_RCU_SYNC(name) \
63 struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type) 54 struct rcu_sync name = __RCU_SYNC_INITIALIZER(name)
64
65#define DEFINE_RCU_SYNC(name) \
66 __DEFINE_RCU_SYNC(name, RCU_SYNC)
67
68#define DEFINE_RCU_SCHED_SYNC(name) \
69 __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
70
71#define DEFINE_RCU_BH_SYNC(name) \
72 __DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
73 55
74#endif /* _LINUX_RCU_SYNC_H_ */ 56#endif /* _LINUX_RCU_SYNC_H_ */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b25d20822e75..8f7167478c1d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -365,16 +365,15 @@ static inline void rcu_preempt_sleep_check(void) { }
365 * other macros that it invokes. 365 * other macros that it invokes.
366 */ 366 */
367#define rcu_assign_pointer(p, v) \ 367#define rcu_assign_pointer(p, v) \
368({ \ 368do { \
369 uintptr_t _r_a_p__v = (uintptr_t)(v); \ 369 uintptr_t _r_a_p__v = (uintptr_t)(v); \
370 rcu_check_sparse(p, __rcu); \ 370 rcu_check_sparse(p, __rcu); \
371 \ 371 \
372 if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ 372 if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \
373 WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ 373 WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \
374 else \ 374 else \
375 smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ 375 smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
376 _r_a_p__v; \ 376} while (0)
377})
378 377
379/** 378/**
380 * rcu_swap_protected() - swap an RCU and a regular pointer 379 * rcu_swap_protected() - swap an RCU and a regular pointer
@@ -586,7 +585,7 @@ static inline void rcu_preempt_sleep_check(void) { }
586 * read-side critical sections may be preempted and they may also block, but 585 * read-side critical sections may be preempted and they may also block, but
587 * only when acquiring spinlocks that are subject to priority inheritance. 586 * only when acquiring spinlocks that are subject to priority inheritance.
588 */ 587 */
589static inline void rcu_read_lock(void) 588static __always_inline void rcu_read_lock(void)
590{ 589{
591 __rcu_read_lock(); 590 __rcu_read_lock();
592 __acquire(RCU); 591 __acquire(RCU);
@@ -803,7 +802,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
803/** 802/**
804 * kfree_rcu() - kfree an object after a grace period. 803 * kfree_rcu() - kfree an object after a grace period.
805 * @ptr: pointer to kfree 804 * @ptr: pointer to kfree
806 * @rcu_head: the name of the struct rcu_head within the type of @ptr. 805 * @rhf: the name of the struct rcu_head within the type of @ptr.
807 * 806 *
808 * Many rcu callbacks functions just call kfree() on the base structure. 807 * Many rcu callbacks functions just call kfree() on the base structure.
809 * These functions are trivial, but their size adds up, and furthermore 808 * These functions are trivial, but their size adds up, and furthermore
@@ -826,9 +825,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
826 * The BUILD_BUG_ON check must not involve any function calls, hence the 825 * The BUILD_BUG_ON check must not involve any function calls, hence the
827 * checks are done in macros here. 826 * checks are done in macros here.
828 */ 827 */
829#define kfree_rcu(ptr, rcu_head) \ 828#define kfree_rcu(ptr, rhf) \
830 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) 829do { \
831 830 typeof (ptr) ___p = (ptr); \
831 \
832 if (___p) \
833 __kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \
834} while (0)
832 835
833/* 836/*
834 * Place this after a lock-acquisition primitive to guarantee that 837 * Place this after a lock-acquisition primitive to guarantee that
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 2ea18a3def04..e401358c4e7e 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -34,12 +34,13 @@
34 */ 34 */
35struct rw_semaphore { 35struct rw_semaphore {
36 atomic_long_t count; 36 atomic_long_t count;
37#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
38 /* 37 /*
39 * Write owner. Used as a speculative check to see 38 * Write owner or one of the read owners as well flags regarding
40 * if the owner is running on the cpu. 39 * the current state of the rwsem. Can be used as a speculative
40 * check to see if the write owner is running on the cpu.
41 */ 41 */
42 struct task_struct *owner; 42 atomic_long_t owner;
43#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
43 struct optimistic_spin_queue osq; /* spinner MCS lock */ 44 struct optimistic_spin_queue osq; /* spinner MCS lock */
44#endif 45#endif
45 raw_spinlock_t wait_lock; 46 raw_spinlock_t wait_lock;
@@ -50,10 +51,10 @@ struct rw_semaphore {
50}; 51};
51 52
52/* 53/*
53 * Setting bit 1 of the owner field but not bit 0 will indicate 54 * Setting all bits of the owner field except bit 0 will indicate
54 * that the rwsem is writer-owned with an unknown owner. 55 * that the rwsem is writer-owned with an unknown owner.
55 */ 56 */
56#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L) 57#define RWSEM_OWNER_UNKNOWN (-2L)
57 58
58/* In all implementations count != 0 means locked */ 59/* In all implementations count != 0 means locked */
59static inline int rwsem_is_locked(struct rw_semaphore *sem) 60static inline int rwsem_is_locked(struct rw_semaphore *sem)
@@ -73,13 +74,14 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
73#endif 74#endif
74 75
75#ifdef CONFIG_RWSEM_SPIN_ON_OWNER 76#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
76#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED, .owner = NULL 77#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
77#else 78#else
78#define __RWSEM_OPT_INIT(lockname) 79#define __RWSEM_OPT_INIT(lockname)
79#endif 80#endif
80 81
81#define __RWSEM_INITIALIZER(name) \ 82#define __RWSEM_INITIALIZER(name) \
82 { __RWSEM_INIT_COUNT(name), \ 83 { __RWSEM_INIT_COUNT(name), \
84 .owner = ATOMIC_LONG_INIT(0), \
83 .wait_list = LIST_HEAD_INIT((name).wait_list), \ 85 .wait_list = LIST_HEAD_INIT((name).wait_list), \
84 .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ 86 .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
85 __RWSEM_OPT_INIT(name) \ 87 __RWSEM_OPT_INIT(name) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1113dd4706ae..459d95e4a574 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -618,7 +618,7 @@ union rcu_special {
618 u8 blocked; 618 u8 blocked;
619 u8 need_qs; 619 u8 need_qs;
620 u8 exp_hint; /* Hint for performance. */ 620 u8 exp_hint; /* Hint for performance. */
621 u8 pad; /* No garbage from compiler! */ 621 u8 deferred_qs;
622 } b; /* Bits. */ 622 } b; /* Bits. */
623 u32 s; /* Set of bits. */ 623 u32 s; /* Set of bits. */
624}; 624};
@@ -1579,10 +1579,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
1579} 1579}
1580#endif 1580#endif
1581 1581
1582#ifndef cpu_relax_yield
1583#define cpu_relax_yield() cpu_relax()
1584#endif
1585
1586extern int yield_to(struct task_struct *p, bool preempt); 1582extern int yield_to(struct task_struct *p, bool preempt);
1587extern void set_user_nice(struct task_struct *p, long nice); 1583extern void set_user_nice(struct task_struct *p, long nice);
1588extern int task_prio(const struct task_struct *p); 1584extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index ad826d2a4557..26a2013ac39c 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -51,6 +51,11 @@ static inline void wake_q_init(struct wake_q_head *head)
51 head->lastp = &head->first; 51 head->lastp = &head->first;
52} 52}
53 53
54static inline bool wake_q_empty(struct wake_q_head *head)
55{
56 return head->first == WAKE_Q_TAIL;
57}
58
54extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); 59extern void wake_q_add(struct wake_q_head *head, struct task_struct *task);
55extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); 60extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task);
56extern void wake_up_q(struct wake_q_head *head); 61extern void wake_up_q(struct wake_q_head *head);
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 9702016734b1..78c2bb376954 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -276,7 +276,7 @@ extern int sigprocmask(int, sigset_t *, sigset_t *);
276extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, 276extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set,
277 sigset_t *oldset, size_t sigsetsize); 277 sigset_t *oldset, size_t sigsetsize);
278extern void restore_user_sigmask(const void __user *usigmask, 278extern void restore_user_sigmask(const void __user *usigmask,
279 sigset_t *sigsaved); 279 sigset_t *sigsaved, bool interrupted);
280extern void set_current_blocked(sigset_t *); 280extern void set_current_blocked(sigset_t *);
281extern void __set_current_blocked(const sigset_t *); 281extern void __set_current_blocked(const sigset_t *);
282extern int show_unhandled_signals; 282extern int show_unhandled_signals;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index a56f08ff3097..6fc856c9eda5 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -35,7 +35,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
35/* 35/*
36 * Call a function on all processors 36 * Call a function on all processors
37 */ 37 */
38int on_each_cpu(smp_call_func_t func, void *info, int wait); 38void on_each_cpu(smp_call_func_t func, void *info, int wait);
39 39
40/* 40/*
41 * Call a function on processors specified by mask, which might include 41 * Call a function on processors specified by mask, which might include
@@ -101,7 +101,7 @@ extern void smp_cpus_done(unsigned int max_cpus);
101/* 101/*
102 * Call a function on all other processors 102 * Call a function on all other processors
103 */ 103 */
104int smp_call_function(smp_call_func_t func, void *info, int wait); 104void smp_call_function(smp_call_func_t func, void *info, int wait);
105void smp_call_function_many(const struct cpumask *mask, 105void smp_call_function_many(const struct cpumask *mask,
106 smp_call_func_t func, void *info, bool wait); 106 smp_call_func_t func, void *info, bool wait);
107 107
@@ -144,9 +144,8 @@ static inline void smp_send_stop(void) { }
144 * These macros fold the SMP functionality into a single CPU system 144 * These macros fold the SMP functionality into a single CPU system
145 */ 145 */
146#define raw_smp_processor_id() 0 146#define raw_smp_processor_id() 0
147static inline int up_smp_call_function(smp_call_func_t func, void *info) 147static inline void up_smp_call_function(smp_call_func_t func, void *info)
148{ 148{
149 return 0;
150} 149}
151#define smp_call_function(func, info, wait) \ 150#define smp_call_function(func, info, wait) \
152 (up_smp_call_function(func, info)) 151 (up_smp_call_function(func, info))
@@ -181,29 +180,46 @@ static inline int get_boot_cpu_id(void)
181 180
182#endif /* !SMP */ 181#endif /* !SMP */
183 182
184/* 183/**
185 * smp_processor_id(): get the current CPU ID. 184 * raw_processor_id() - get the current (unstable) CPU id
185 *
186 * For then you know what you are doing and need an unstable
187 * CPU id.
188 */
189
190/**
191 * smp_processor_id() - get the current (stable) CPU id
192 *
193 * This is the normal accessor to the CPU id and should be used
194 * whenever possible.
186 * 195 *
187 * if DEBUG_PREEMPT is enabled then we check whether it is 196 * The CPU id is stable when:
188 * used in a preemption-safe way. (smp_processor_id() is safe
189 * if it's used in a preemption-off critical section, or in
190 * a thread that is bound to the current CPU.)
191 * 197 *
192 * NOTE: raw_smp_processor_id() is for internal use only 198 * - IRQs are disabled;
193 * (smp_processor_id() is the preferred variant), but in rare 199 * - preemption is disabled;
194 * instances it might also be used to turn off false positives 200 * - the task is CPU affine.
195 * (i.e. smp_processor_id() use that the debugging code reports but 201 *
196 * which use for some reason is legal). Don't use this to hack around 202 * When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN
197 * the warning message, as your code might not work under PREEMPT. 203 * when smp_processor_id() is used when the CPU id is not stable.
204 */
205
206/*
207 * Allow the architecture to differentiate between a stable and unstable read.
208 * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
209 * regular asm read for the stable.
198 */ 210 */
211#ifndef __smp_processor_id
212#define __smp_processor_id(x) raw_smp_processor_id(x)
213#endif
214
199#ifdef CONFIG_DEBUG_PREEMPT 215#ifdef CONFIG_DEBUG_PREEMPT
200 extern unsigned int debug_smp_processor_id(void); 216 extern unsigned int debug_smp_processor_id(void);
201# define smp_processor_id() debug_smp_processor_id() 217# define smp_processor_id() debug_smp_processor_id()
202#else 218#else
203# define smp_processor_id() raw_smp_processor_id() 219# define smp_processor_id() __smp_processor_id()
204#endif 220#endif
205 221
206#define get_cpu() ({ preempt_disable(); smp_processor_id(); }) 222#define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
207#define put_cpu() preempt_enable() 223#define put_cpu() preempt_enable()
208 224
209/* 225/*
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 7f7c8c050f63..9cfcc8a756ae 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -120,9 +120,17 @@ struct srcu_struct {
120 * 120 *
121 * See include/linux/percpu-defs.h for the rules on per-CPU variables. 121 * See include/linux/percpu-defs.h for the rules on per-CPU variables.
122 */ 122 */
123#define __DEFINE_SRCU(name, is_static) \ 123#ifdef MODULE
124 static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ 124# define __DEFINE_SRCU(name, is_static) \
125 is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data) 125 is_static struct srcu_struct name; \
126 struct srcu_struct * const __srcu_struct_##name \
127 __section("___srcu_struct_ptrs") = &name
128#else
129# define __DEFINE_SRCU(name, is_static) \
130 static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \
131 is_static struct srcu_struct name = \
132 __SRCU_STRUCT_INIT(name, name##_srcu_data)
133#endif
126#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) 134#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
127#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) 135#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
128 136
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 6d3635c86dbe..f9a0c6189852 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -36,6 +36,7 @@ int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
36int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); 36int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
37void stop_machine_park(int cpu); 37void stop_machine_park(int cpu);
38void stop_machine_unpark(int cpu); 38void stop_machine_unpark(int cpu);
39void stop_machine_yield(const struct cpumask *cpumask);
39 40
40#else /* CONFIG_SMP */ 41#else /* CONFIG_SMP */
41 42
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 8594001e8be8..f0d262ad7b78 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -209,8 +209,9 @@ extern int suspend_valid_only_mem(suspend_state_t state);
209 209
210extern unsigned int pm_suspend_global_flags; 210extern unsigned int pm_suspend_global_flags;
211 211
212#define PM_SUSPEND_FLAG_FW_SUSPEND (1 << 0) 212#define PM_SUSPEND_FLAG_FW_SUSPEND BIT(0)
213#define PM_SUSPEND_FLAG_FW_RESUME (1 << 1) 213#define PM_SUSPEND_FLAG_FW_RESUME BIT(1)
214#define PM_SUSPEND_FLAG_NO_PLATFORM BIT(2)
214 215
215static inline void pm_suspend_clear_flags(void) 216static inline void pm_suspend_clear_flags(void)
216{ 217{
@@ -227,6 +228,11 @@ static inline void pm_set_resume_via_firmware(void)
227 pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_RESUME; 228 pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_RESUME;
228} 229}
229 230
231static inline void pm_set_suspend_no_platform(void)
232{
233 pm_suspend_global_flags |= PM_SUSPEND_FLAG_NO_PLATFORM;
234}
235
230/** 236/**
231 * pm_suspend_via_firmware - Check if platform firmware will suspend the system. 237 * pm_suspend_via_firmware - Check if platform firmware will suspend the system.
232 * 238 *
@@ -268,6 +274,22 @@ static inline bool pm_resume_via_firmware(void)
268 return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_RESUME); 274 return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_RESUME);
269} 275}
270 276
277/**
278 * pm_suspend_no_platform - Check if platform may change device power states.
279 *
280 * To be called during system-wide power management transitions to sleep states
281 * or during the subsequent system-wide transitions back to the working state.
282 *
283 * Return 'true' if the power states of devices remain under full control of the
284 * kernel throughout the system-wide suspend and resume cycle in progress (that
285 * is, if a device is put into a certain power state during suspend, it can be
286 * expected to remain in that state during resume).
287 */
288static inline bool pm_suspend_no_platform(void)
289{
290 return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_NO_PLATFORM);
291}
292
271/* Suspend-to-idle state machnine. */ 293/* Suspend-to-idle state machnine. */
272enum s2idle_states { 294enum s2idle_states {
273 S2IDLE_STATE_NONE, /* Not suspended/suspending. */ 295 S2IDLE_STATE_NONE, /* Not suspended/suspending. */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index a8ab0f143ac4..b27e2ffa96c1 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -113,6 +113,34 @@ static inline ktime_t ktime_get_coarse_clocktai(void)
113 return ktime_get_coarse_with_offset(TK_OFFS_TAI); 113 return ktime_get_coarse_with_offset(TK_OFFS_TAI);
114} 114}
115 115
116static inline ktime_t ktime_get_coarse(void)
117{
118 struct timespec64 ts;
119
120 ktime_get_coarse_ts64(&ts);
121 return timespec64_to_ktime(ts);
122}
123
124static inline u64 ktime_get_coarse_ns(void)
125{
126 return ktime_to_ns(ktime_get_coarse());
127}
128
129static inline u64 ktime_get_coarse_real_ns(void)
130{
131 return ktime_to_ns(ktime_get_coarse_real());
132}
133
134static inline u64 ktime_get_coarse_boottime_ns(void)
135{
136 return ktime_to_ns(ktime_get_coarse_boottime());
137}
138
139static inline u64 ktime_get_coarse_clocktai_ns(void)
140{
141 return ktime_to_ns(ktime_get_coarse_clocktai());
142}
143
116/** 144/**
117 * ktime_mono_to_real - Convert monotonic time to clock realtime 145 * ktime_mono_to_real - Convert monotonic time to clock realtime
118 */ 146 */
@@ -131,12 +159,12 @@ static inline u64 ktime_get_real_ns(void)
131 return ktime_to_ns(ktime_get_real()); 159 return ktime_to_ns(ktime_get_real());
132} 160}
133 161
134static inline u64 ktime_get_boot_ns(void) 162static inline u64 ktime_get_boottime_ns(void)
135{ 163{
136 return ktime_to_ns(ktime_get_boottime()); 164 return ktime_to_ns(ktime_get_boottime());
137} 165}
138 166
139static inline u64 ktime_get_tai_ns(void) 167static inline u64 ktime_get_clocktai_ns(void)
140{ 168{
141 return ktime_to_ns(ktime_get_clocktai()); 169 return ktime_to_ns(ktime_get_clocktai());
142} 170}
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 7b066fd38248..282e4f2a532a 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -36,19 +36,30 @@ struct timer_list {
36#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) 36#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
37#endif 37#endif
38 38
39/* 39/**
40 * A deferrable timer will work normally when the system is busy, but 40 * @TIMER_DEFERRABLE: A deferrable timer will work normally when the
41 * will not cause a CPU to come out of idle just to service it; instead, 41 * system is busy, but will not cause a CPU to come out of idle just
42 * the timer will be serviced when the CPU eventually wakes up with a 42 * to service it; instead, the timer will be serviced when the CPU
43 * subsequent non-deferrable timer. 43 * eventually wakes up with a subsequent non-deferrable timer.
44 * 44 *
45 * An irqsafe timer is executed with IRQ disabled and it's safe to wait for 45 * @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and
46 * the completion of the running instance from IRQ handlers, for example, 46 * it's safe to wait for the completion of the running instance from
47 * by calling del_timer_sync(). 47 * IRQ handlers, for example, by calling del_timer_sync().
48 * 48 *
49 * Note: The irq disabled callback execution is a special case for 49 * Note: The irq disabled callback execution is a special case for
50 * workqueue locking issues. It's not meant for executing random crap 50 * workqueue locking issues. It's not meant for executing random crap
51 * with interrupts disabled. Abuse is monitored! 51 * with interrupts disabled. Abuse is monitored!
52 *
53 * @TIMER_PINNED: A pinned timer will not be affected by any timer
54 * placement heuristics (like, NOHZ) and will always expire on the CPU
55 * on which the timer was enqueued.
56 *
57 * Note: Because enqueuing of timers can migrate the timer from one
58 * CPU to another, pinned timers are not guaranteed to stay on the
59 * initialy selected CPU. They move to the CPU on which the enqueue
60 * function is invoked via mod_timer() or add_timer(). If the timer
61 * should be placed on a particular CPU, then add_timer_on() has to be
62 * used.
52 */ 63 */
53#define TIMER_CPUMASK 0x0003FFFF 64#define TIMER_CPUMASK 0x0003FFFF
54#define TIMER_MIGRATING 0x00040000 65#define TIMER_MIGRATING 0x00040000
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 23d80db426d7..a620118385bb 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -66,7 +66,7 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void));
66 66
67/* Task stuttering, which forces load/no-load transitions. */ 67/* Task stuttering, which forces load/no-load transitions. */
68bool stutter_wait(const char *title); 68bool stutter_wait(const char *title);
69int torture_stutter_init(int s); 69int torture_stutter_init(int s, int sgap);
70 70
71/* Initialization and cleanup. */ 71/* Initialization and cleanup. */
72bool torture_init_begin(char *ttype, int v); 72bool torture_init_begin(char *ttype, int v);
diff --git a/include/linux/types.h b/include/linux/types.h
index 231114ae38f4..05030f608be3 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -174,7 +174,7 @@ typedef struct {
174 174
175#ifdef CONFIG_64BIT 175#ifdef CONFIG_64BIT
176typedef struct { 176typedef struct {
177 long counter; 177 s64 counter;
178} atomic64_t; 178} atomic64_t;
179#endif 179#endif
180 180
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 0e01e6129145..5921599b6dc4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -265,6 +265,7 @@ enum xa_lock_type {
265#define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) 265#define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U)
266#define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) 266#define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U)
267#define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) 267#define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U)
268#define XA_FLAGS_ACCOUNT ((__force gfp_t)32U)
268#define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ 269#define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
269 (__force unsigned)(mark))) 270 (__force unsigned)(mark)))
270 271
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8fb5be3ca0ca..1fce25b1d87f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2007,7 +2007,7 @@ enum cfg80211_signal_type {
2007 * received by the device (not just by the host, in case it was 2007 * received by the device (not just by the host, in case it was
2008 * buffered on the device) and be accurate to about 10ms. 2008 * buffered on the device) and be accurate to about 10ms.
2009 * If the frame isn't buffered, just passing the return value of 2009 * If the frame isn't buffered, just passing the return value of
2010 * ktime_get_boot_ns() is likely appropriate. 2010 * ktime_get_boottime_ns() is likely appropriate.
2011 * @parent_tsf: the time at the start of reception of the first octet of the 2011 * @parent_tsf: the time at the start of reception of the first octet of the
2012 * timestamp field of the frame. The time is the TSF of the BSS specified 2012 * timestamp field of the frame. The time is the TSF of the BSS specified
2013 * by %parent_bssid. 2013 * by %parent_bssid.
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4790beaa86e0..ee7405e759ba 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -262,8 +262,8 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk)
262 inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; 262 inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
263} 263}
264 264
265static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, 265static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
266 struct in6_addr *daddr) 266 const struct in6_addr *daddr)
267{ 267{
268 if (rt->rt6i_flags & RTF_GATEWAY) 268 if (rt->rt6i_flags & RTF_GATEWAY)
269 return &rt->rt6i_gateway; 269 return &rt->rt6i_gateway;
diff --git a/include/net/route.h b/include/net/route.h
index 065b47754f05..55ff71ffb796 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -221,6 +221,7 @@ void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
221struct rtable *rt_dst_alloc(struct net_device *dev, 221struct rtable *rt_dst_alloc(struct net_device *dev,
222 unsigned int flags, u16 type, 222 unsigned int flags, u16 type,
223 bool nopolicy, bool noxfrm, bool will_cache); 223 bool nopolicy, bool noxfrm, bool will_cache);
224struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
224 225
225struct in_ifaddr; 226struct in_ifaddr;
226void fib_add_ifaddr(struct in_ifaddr *); 227void fib_add_ifaddr(struct in_ifaddr *);
diff --git a/include/net/tls.h b/include/net/tls.h
index 4a55ce6a303f..53d96bca220d 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -373,21 +373,6 @@ static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
373 return !!ctx->partially_sent_record; 373 return !!ctx->partially_sent_record;
374} 374}
375 375
376static inline int tls_complete_pending_work(struct sock *sk,
377 struct tls_context *ctx,
378 int flags, long *timeo)
379{
380 int rc = 0;
381
382 if (unlikely(sk->sk_write_pending))
383 rc = wait_on_pending_writer(sk, timeo);
384
385 if (!rc && tls_is_partially_sent_record(ctx))
386 rc = tls_push_partial_record(sk, ctx, flags);
387
388 return rc;
389}
390
391static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) 376static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
392{ 377{
393 return tls_ctx->pending_open_record_frags; 378 return tls_ctx->pending_open_record_frags;
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
new file mode 100644
index 000000000000..2e302c0f41f7
--- /dev/null
+++ b/include/vdso/datapage.h
@@ -0,0 +1,89 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __VDSO_DATAPAGE_H
3#define __VDSO_DATAPAGE_H
4
5#ifndef __ASSEMBLY__
6
7#include <linux/bits.h>
8#include <linux/time.h>
9#include <linux/types.h>
10
11#define VDSO_BASES (CLOCK_TAI + 1)
12#define VDSO_HRES (BIT(CLOCK_REALTIME) | \
13 BIT(CLOCK_MONOTONIC) | \
14 BIT(CLOCK_BOOTTIME) | \
15 BIT(CLOCK_TAI))
16#define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \
17 BIT(CLOCK_MONOTONIC_COARSE))
18#define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW))
19
20#define CS_HRES_COARSE 0
21#define CS_RAW 1
22#define CS_BASES (CS_RAW + 1)
23
24/**
25 * struct vdso_timestamp - basetime per clock_id
26 * @sec: seconds
27 * @nsec: nanoseconds
28 *
29 * There is one vdso_timestamp object in vvar for each vDSO-accelerated
30 * clock_id. For high-resolution clocks, this encodes the time
31 * corresponding to vdso_data.cycle_last. For coarse clocks this encodes
32 * the actual time.
33 *
34 * To be noticed that for highres clocks nsec is left-shifted by
35 * vdso_data.cs[x].shift.
36 */
37struct vdso_timestamp {
38 u64 sec;
39 u64 nsec;
40};
41
42/**
43 * struct vdso_data - vdso datapage representation
44 * @seq: timebase sequence counter
45 * @clock_mode: clock mode
46 * @cycle_last: timebase at clocksource init
47 * @mask: clocksource mask
48 * @mult: clocksource multiplier
49 * @shift: clocksource shift
50 * @basetime[clock_id]: basetime per clock_id
51 * @tz_minuteswest: minutes west of Greenwich
52 * @tz_dsttime: type of DST correction
53 * @hrtimer_res: hrtimer resolution
54 * @__unused: unused
55 *
56 * vdso_data will be accessed by 64 bit and compat code at the same time
57 * so we should be careful before modifying this structure.
58 */
59struct vdso_data {
60 u32 seq;
61
62 s32 clock_mode;
63 u64 cycle_last;
64 u64 mask;
65 u32 mult;
66 u32 shift;
67
68 struct vdso_timestamp basetime[VDSO_BASES];
69
70 s32 tz_minuteswest;
71 s32 tz_dsttime;
72 u32 hrtimer_res;
73 u32 __unused;
74};
75
76/*
77 * We use the hidden visibility to prevent the compiler from generating a GOT
78 * relocation. Not only is going through a GOT useless (the entry couldn't and
79 * must not be overridden by another library), it does not even work: the linker
80 * cannot generate an absolute address to the data page.
81 *
82 * With the hidden visibility, the compiler simply generates a PC-relative
83 * relocation, and this is what we need.
84 */
85extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
86
87#endif /* !__ASSEMBLY__ */
88
89#endif /* __VDSO_DATAPAGE_H */
diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h
new file mode 100644
index 000000000000..01641dbb68ef
--- /dev/null
+++ b/include/vdso/helpers.h
@@ -0,0 +1,56 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __VDSO_HELPERS_H
3#define __VDSO_HELPERS_H
4
5#ifndef __ASSEMBLY__
6
7#include <vdso/datapage.h>
8
9static __always_inline u32 vdso_read_begin(const struct vdso_data *vd)
10{
11 u32 seq;
12
13 while ((seq = READ_ONCE(vd->seq)) & 1)
14 cpu_relax();
15
16 smp_rmb();
17 return seq;
18}
19
20static __always_inline u32 vdso_read_retry(const struct vdso_data *vd,
21 u32 start)
22{
23 u32 seq;
24
25 smp_rmb();
26 seq = READ_ONCE(vd->seq);
27 return seq != start;
28}
29
30static __always_inline void vdso_write_begin(struct vdso_data *vd)
31{
32 /*
33 * WRITE_ONCE it is required otherwise the compiler can validly tear
34 * updates to vd[x].seq and it is possible that the value seen by the
35 * reader it is inconsistent.
36 */
37 WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
38 WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
39 smp_wmb();
40}
41
42static __always_inline void vdso_write_end(struct vdso_data *vd)
43{
44 smp_wmb();
45 /*
46 * WRITE_ONCE it is required otherwise the compiler can validly tear
47 * updates to vd[x].seq and it is possible that the value seen by the
48 * reader it is inconsistent.
49 */
50 WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
51 WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
52}
53
54#endif /* !__ASSEMBLY__ */
55
56#endif /* __VDSO_HELPERS_H */
diff --git a/include/vdso/vsyscall.h b/include/vdso/vsyscall.h
new file mode 100644
index 000000000000..2c6134e0c23d
--- /dev/null
+++ b/include/vdso/vsyscall.h
@@ -0,0 +1,11 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __VDSO_VSYSCALL_H
3#define __VDSO_VSYSCALL_H
4
5#ifndef __ASSEMBLY__
6
7#include <asm/vdso/vsyscall.h>
8
9#endif /* !__ASSEMBLY__ */
10
11#endif /* __VDSO_VSYSCALL_H */
diff --git a/init/init_task.c b/init/init_task.c
index 3c27c0efa316..7ab773b9b3cd 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -167,6 +167,8 @@ struct task_struct init_task
167 .softirqs_enabled = 1, 167 .softirqs_enabled = 1,
168#endif 168#endif
169#ifdef CONFIG_LOCKDEP 169#ifdef CONFIG_LOCKDEP
170 .lockdep_depth = 0, /* no locks held yet */
171 .curr_chain_key = INITIAL_CHAIN_KEY,
170 .lockdep_recursion = 0, 172 .lockdep_recursion = 0,
171#endif 173#endif
172#ifdef CONFIG_FUNCTION_GRAPH_TRACER 174#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/init/initramfs.c b/init/initramfs.c
index 178130fd61c2..c47dad0884f7 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -617,7 +617,7 @@ static inline void clean_rootfs(void)
617#endif /* CONFIG_BLK_DEV_RAM */ 617#endif /* CONFIG_BLK_DEV_RAM */
618 618
619#ifdef CONFIG_BLK_DEV_RAM 619#ifdef CONFIG_BLK_DEV_RAM
620static void populate_initrd_image(char *err) 620static void __init populate_initrd_image(char *err)
621{ 621{
622 ssize_t written; 622 ssize_t written;
623 int fd; 623 int fd;
@@ -637,7 +637,7 @@ static void populate_initrd_image(char *err)
637 ksys_close(fd); 637 ksys_close(fd);
638} 638}
639#else 639#else
640static void populate_initrd_image(char *err) 640static void __init populate_initrd_image(char *err)
641{ 641{
642 printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); 642 printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
643} 643}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 42d17f730780..5b30f8baaf02 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1668,7 +1668,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
1668 if (err < 0) 1668 if (err < 0)
1669 goto free_prog; 1669 goto free_prog;
1670 1670
1671 prog->aux->load_time = ktime_get_boot_ns(); 1671 prog->aux->load_time = ktime_get_boottime_ns();
1672 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1672 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
1673 if (err) 1673 if (err)
1674 goto free_prog; 1674 goto free_prog;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index bf9dbffd46b1..cdbeff87fa99 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -101,7 +101,7 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
101 */ 101 */
102static DEFINE_SPINLOCK(cgroup_file_kn_lock); 102static DEFINE_SPINLOCK(cgroup_file_kn_lock);
103 103
104struct percpu_rw_semaphore cgroup_threadgroup_rwsem; 104DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
105 105
106#define cgroup_assert_mutex_or_rcu_locked() \ 106#define cgroup_assert_mutex_or_rcu_locked() \
107 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ 107 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
@@ -5666,7 +5666,6 @@ int __init cgroup_init(void)
5666 int ssid; 5666 int ssid;
5667 5667
5668 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); 5668 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5669 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5670 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); 5669 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5671 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); 5670 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5672 5671
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 077fde6fb953..e84c0873559e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -522,7 +522,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
522 /* 522 /*
523 * SMT soft disabling on X86 requires to bring the CPU out of the 523 * SMT soft disabling on X86 requires to bring the CPU out of the
524 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The 524 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
525 * CPU marked itself as booted_once in cpu_notify_starting() so the 525 * CPU marked itself as booted_once in notify_cpu_starting() so the
526 * cpu_smt_allowed() check will now return false if this is not the 526 * cpu_smt_allowed() check will now return false if this is not the
527 * primary sibling. 527 * primary sibling.
528 */ 528 */
@@ -1221,6 +1221,13 @@ int freeze_secondary_cpus(int primary)
1221 for_each_online_cpu(cpu) { 1221 for_each_online_cpu(cpu) {
1222 if (cpu == primary) 1222 if (cpu == primary)
1223 continue; 1223 continue;
1224
1225 if (pm_wakeup_pending()) {
1226 pr_info("Wakeup pending. Abort CPU freeze\n");
1227 error = -EBUSY;
1228 break;
1229 }
1230
1224 trace_suspend_resume(TPS("CPU_OFF"), cpu, true); 1231 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1225 error = _cpu_down(cpu, 1, CPUHP_OFFLINE); 1232 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1226 trace_suspend_resume(TPS("CPU_OFF"), cpu, false); 1233 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
@@ -1964,6 +1971,9 @@ static ssize_t write_cpuhp_fail(struct device *dev,
1964 if (ret) 1971 if (ret)
1965 return ret; 1972 return ret;
1966 1973
1974 if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
1975 return -EINVAL;
1976
1967 /* 1977 /*
1968 * Cannot fail STARTING/DYING callbacks. 1978 * Cannot fail STARTING/DYING callbacks.
1969 */ 1979 */
@@ -2339,6 +2349,9 @@ static int __init mitigations_parse_cmdline(char *arg)
2339 cpu_mitigations = CPU_MITIGATIONS_AUTO; 2349 cpu_mitigations = CPU_MITIGATIONS_AUTO;
2340 else if (!strcmp(arg, "auto,nosmt")) 2350 else if (!strcmp(arg, "auto,nosmt"))
2341 cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; 2351 cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2352 else
2353 pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
2354 arg);
2342 2355
2343 return 0; 2356 return 0;
2344} 2357}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index abbd4b3b96c2..29e5f7880a4b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
5005 if (perf_event_check_period(event, value)) 5005 if (perf_event_check_period(event, value))
5006 return -EINVAL; 5006 return -EINVAL;
5007 5007
5008 if (!event->attr.freq && (value & (1ULL << 63)))
5009 return -EINVAL;
5010
5008 event_function_call(event, __perf_event_period, &value); 5011 event_function_call(event, __perf_event_period, &value);
5009 5012
5010 return 0; 5013 return 0;
@@ -5923,7 +5926,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
5923 if (user_mode(regs)) { 5926 if (user_mode(regs)) {
5924 regs_user->abi = perf_reg_abi(current); 5927 regs_user->abi = perf_reg_abi(current);
5925 regs_user->regs = regs; 5928 regs_user->regs = regs;
5926 } else if (current->mm) { 5929 } else if (!(current->flags & PF_KTHREAD)) {
5927 perf_get_regs_user(regs_user, regs, regs_user_copy); 5930 perf_get_regs_user(regs_user, regs, regs_user_copy);
5928 } else { 5931 } else {
5929 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; 5932 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
@@ -10033,6 +10036,12 @@ void perf_pmu_unregister(struct pmu *pmu)
10033} 10036}
10034EXPORT_SYMBOL_GPL(perf_pmu_unregister); 10037EXPORT_SYMBOL_GPL(perf_pmu_unregister);
10035 10038
10039static inline bool has_extended_regs(struct perf_event *event)
10040{
10041 return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) ||
10042 (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
10043}
10044
10036static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) 10045static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
10037{ 10046{
10038 struct perf_event_context *ctx = NULL; 10047 struct perf_event_context *ctx = NULL;
@@ -10064,12 +10073,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
10064 perf_event_ctx_unlock(event->group_leader, ctx); 10073 perf_event_ctx_unlock(event->group_leader, ctx);
10065 10074
10066 if (!ret) { 10075 if (!ret) {
10076 if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
10077 has_extended_regs(event))
10078 ret = -EOPNOTSUPP;
10079
10067 if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && 10080 if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
10068 event_has_any_exclude_flag(event)) { 10081 event_has_any_exclude_flag(event))
10069 if (event->destroy)
10070 event->destroy(event);
10071 ret = -EINVAL; 10082 ret = -EINVAL;
10072 } 10083
10084 if (ret && event->destroy)
10085 event->destroy(event);
10073 } 10086 }
10074 10087
10075 if (ret) 10088 if (ret)
@@ -10680,11 +10693,11 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
10680 break; 10693 break;
10681 10694
10682 case CLOCK_BOOTTIME: 10695 case CLOCK_BOOTTIME:
10683 event->clock = &ktime_get_boot_ns; 10696 event->clock = &ktime_get_boottime_ns;
10684 break; 10697 break;
10685 10698
10686 case CLOCK_TAI: 10699 case CLOCK_TAI:
10687 event->clock = &ktime_get_tai_ns; 10700 event->clock = &ktime_get_clocktai_ns;
10688 break; 10701 break;
10689 10702
10690 default: 10703 default:
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 78f61bfc6b79..97c367f0a9aa 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -46,7 +46,7 @@ static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
46static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; 46static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
47#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) 47#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
48 48
49static struct percpu_rw_semaphore dup_mmap_sem; 49DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
50 50
51/* Have a copy of original instruction */ 51/* Have a copy of original instruction */
52#define UPROBE_COPY_INSN 0 52#define UPROBE_COPY_INSN 0
@@ -2302,7 +2302,5 @@ void __init uprobes_init(void)
2302 for (i = 0; i < UPROBES_HASH_SZ; i++) 2302 for (i = 0; i < UPROBES_HASH_SZ; i++)
2303 mutex_init(&uprobes_mmap_mutex[i]); 2303 mutex_init(&uprobes_mmap_mutex[i]);
2304 2304
2305 BUG_ON(percpu_init_rwsem(&dup_mmap_sem));
2306
2307 BUG_ON(register_die_notifier(&uprobe_exception_nb)); 2305 BUG_ON(register_die_notifier(&uprobe_exception_nb));
2308} 2306}
diff --git a/kernel/fork.c b/kernel/fork.c
index 6be686283e55..847dd147b068 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -248,7 +248,11 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
248 struct page *page = alloc_pages_node(node, THREADINFO_GFP, 248 struct page *page = alloc_pages_node(node, THREADINFO_GFP,
249 THREAD_SIZE_ORDER); 249 THREAD_SIZE_ORDER);
250 250
251 return page ? page_address(page) : NULL; 251 if (likely(page)) {
252 tsk->stack = page_address(page);
253 return tsk->stack;
254 }
255 return NULL;
252#endif 256#endif
253} 257}
254 258
@@ -1714,31 +1718,6 @@ const struct file_operations pidfd_fops = {
1714#endif 1718#endif
1715}; 1719};
1716 1720
1717/**
1718 * pidfd_create() - Create a new pid file descriptor.
1719 *
1720 * @pid: struct pid that the pidfd will reference
1721 *
1722 * This creates a new pid file descriptor with the O_CLOEXEC flag set.
1723 *
1724 * Note, that this function can only be called after the fd table has
1725 * been unshared to avoid leaking the pidfd to the new process.
1726 *
1727 * Return: On success, a cloexec pidfd is returned.
1728 * On error, a negative errno number will be returned.
1729 */
1730static int pidfd_create(struct pid *pid)
1731{
1732 int fd;
1733
1734 fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
1735 O_RDWR | O_CLOEXEC);
1736 if (fd < 0)
1737 put_pid(pid);
1738
1739 return fd;
1740}
1741
1742static void __delayed_free_task(struct rcu_head *rhp) 1721static void __delayed_free_task(struct rcu_head *rhp)
1743{ 1722{
1744 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 1723 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
@@ -1776,6 +1755,7 @@ static __latent_entropy struct task_struct *copy_process(
1776 int pidfd = -1, retval; 1755 int pidfd = -1, retval;
1777 struct task_struct *p; 1756 struct task_struct *p;
1778 struct multiprocess_signals delayed; 1757 struct multiprocess_signals delayed;
1758 struct file *pidfile = NULL;
1779 1759
1780 /* 1760 /*
1781 * Don't allow sharing the root directory with processes in a different 1761 * Don't allow sharing the root directory with processes in a different
@@ -1824,8 +1804,6 @@ static __latent_entropy struct task_struct *copy_process(
1824 } 1804 }
1825 1805
1826 if (clone_flags & CLONE_PIDFD) { 1806 if (clone_flags & CLONE_PIDFD) {
1827 int reserved;
1828
1829 /* 1807 /*
1830 * - CLONE_PARENT_SETTID is useless for pidfds and also 1808 * - CLONE_PARENT_SETTID is useless for pidfds and also
1831 * parent_tidptr is used to return pidfds. 1809 * parent_tidptr is used to return pidfds.
@@ -1836,16 +1814,6 @@ static __latent_entropy struct task_struct *copy_process(
1836 if (clone_flags & 1814 if (clone_flags &
1837 (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) 1815 (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
1838 return ERR_PTR(-EINVAL); 1816 return ERR_PTR(-EINVAL);
1839
1840 /*
1841 * Verify that parent_tidptr is sane so we can potentially
1842 * reuse it later.
1843 */
1844 if (get_user(reserved, parent_tidptr))
1845 return ERR_PTR(-EFAULT);
1846
1847 if (reserved != 0)
1848 return ERR_PTR(-EINVAL);
1849 } 1817 }
1850 1818
1851 /* 1819 /*
@@ -1986,9 +1954,6 @@ static __latent_entropy struct task_struct *copy_process(
1986 p->pagefault_disabled = 0; 1954 p->pagefault_disabled = 0;
1987 1955
1988#ifdef CONFIG_LOCKDEP 1956#ifdef CONFIG_LOCKDEP
1989 p->lockdep_depth = 0; /* no locks held yet */
1990 p->curr_chain_key = 0;
1991 p->lockdep_recursion = 0;
1992 lockdep_init_task(p); 1957 lockdep_init_task(p);
1993#endif 1958#endif
1994 1959
@@ -2060,11 +2025,21 @@ static __latent_entropy struct task_struct *copy_process(
2060 * if the fd table isn't shared). 2025 * if the fd table isn't shared).
2061 */ 2026 */
2062 if (clone_flags & CLONE_PIDFD) { 2027 if (clone_flags & CLONE_PIDFD) {
2063 retval = pidfd_create(pid); 2028 retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
2064 if (retval < 0) 2029 if (retval < 0)
2065 goto bad_fork_free_pid; 2030 goto bad_fork_free_pid;
2066 2031
2067 pidfd = retval; 2032 pidfd = retval;
2033
2034 pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
2035 O_RDWR | O_CLOEXEC);
2036 if (IS_ERR(pidfile)) {
2037 put_unused_fd(pidfd);
2038 retval = PTR_ERR(pidfile);
2039 goto bad_fork_free_pid;
2040 }
2041 get_pid(pid); /* held by pidfile now */
2042
2068 retval = put_user(pidfd, parent_tidptr); 2043 retval = put_user(pidfd, parent_tidptr);
2069 if (retval) 2044 if (retval)
2070 goto bad_fork_put_pidfd; 2045 goto bad_fork_put_pidfd;
@@ -2141,7 +2116,7 @@ static __latent_entropy struct task_struct *copy_process(
2141 */ 2116 */
2142 2117
2143 p->start_time = ktime_get_ns(); 2118 p->start_time = ktime_get_ns();
2144 p->real_start_time = ktime_get_boot_ns(); 2119 p->real_start_time = ktime_get_boottime_ns();
2145 2120
2146 /* 2121 /*
2147 * Make it visible to the rest of the system, but dont wake it up yet. 2122 * Make it visible to the rest of the system, but dont wake it up yet.
@@ -2182,6 +2157,9 @@ static __latent_entropy struct task_struct *copy_process(
2182 goto bad_fork_cancel_cgroup; 2157 goto bad_fork_cancel_cgroup;
2183 } 2158 }
2184 2159
2160 /* past the last point of failure */
2161 if (pidfile)
2162 fd_install(pidfd, pidfile);
2185 2163
2186 init_task_pid_links(p); 2164 init_task_pid_links(p);
2187 if (likely(p->pid)) { 2165 if (likely(p->pid)) {
@@ -2248,8 +2226,10 @@ bad_fork_cancel_cgroup:
2248bad_fork_cgroup_threadgroup_change_end: 2226bad_fork_cgroup_threadgroup_change_end:
2249 cgroup_threadgroup_change_end(current); 2227 cgroup_threadgroup_change_end(current);
2250bad_fork_put_pidfd: 2228bad_fork_put_pidfd:
2251 if (clone_flags & CLONE_PIDFD) 2229 if (clone_flags & CLONE_PIDFD) {
2252 ksys_close(pidfd); 2230 fput(pidfile);
2231 put_unused_fd(pidfd);
2232 }
2253bad_fork_free_pid: 2233bad_fork_free_pid:
2254 if (pid != &init_struct_pid) 2234 if (pid != &init_struct_pid)
2255 free_pid(pid); 2235 free_pid(pid);
diff --git a/kernel/futex.c b/kernel/futex.c
index 4b5b468c58b6..6d50728ef2e7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -471,6 +471,37 @@ enum futex_access {
471}; 471};
472 472
473/** 473/**
474 * futex_setup_timer - set up the sleeping hrtimer.
475 * @time: ptr to the given timeout value
476 * @timeout: the hrtimer_sleeper structure to be set up
477 * @flags: futex flags
478 * @range_ns: optional range in ns
479 *
480 * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
481 * value given
482 */
483static inline struct hrtimer_sleeper *
484futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
485 int flags, u64 range_ns)
486{
487 if (!time)
488 return NULL;
489
490 hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
491 CLOCK_REALTIME : CLOCK_MONOTONIC,
492 HRTIMER_MODE_ABS);
493 hrtimer_init_sleeper(timeout, current);
494
495 /*
496 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
497 * effectively the same as calling hrtimer_set_expires().
498 */
499 hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
500
501 return timeout;
502}
503
504/**
474 * get_futex_key() - Get parameters which are the keys for a futex 505 * get_futex_key() - Get parameters which are the keys for a futex
475 * @uaddr: virtual address of the futex 506 * @uaddr: virtual address of the futex
476 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED 507 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
@@ -2679,7 +2710,7 @@ out:
2679static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, 2710static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2680 ktime_t *abs_time, u32 bitset) 2711 ktime_t *abs_time, u32 bitset)
2681{ 2712{
2682 struct hrtimer_sleeper timeout, *to = NULL; 2713 struct hrtimer_sleeper timeout, *to;
2683 struct restart_block *restart; 2714 struct restart_block *restart;
2684 struct futex_hash_bucket *hb; 2715 struct futex_hash_bucket *hb;
2685 struct futex_q q = futex_q_init; 2716 struct futex_q q = futex_q_init;
@@ -2689,17 +2720,8 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2689 return -EINVAL; 2720 return -EINVAL;
2690 q.bitset = bitset; 2721 q.bitset = bitset;
2691 2722
2692 if (abs_time) { 2723 to = futex_setup_timer(abs_time, &timeout, flags,
2693 to = &timeout; 2724 current->timer_slack_ns);
2694
2695 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2696 CLOCK_REALTIME : CLOCK_MONOTONIC,
2697 HRTIMER_MODE_ABS);
2698 hrtimer_init_sleeper(to, current);
2699 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2700 current->timer_slack_ns);
2701 }
2702
2703retry: 2725retry:
2704 /* 2726 /*
2705 * Prepare to wait on uaddr. On success, holds hb lock and increments 2727 * Prepare to wait on uaddr. On success, holds hb lock and increments
@@ -2779,7 +2801,7 @@ static long futex_wait_restart(struct restart_block *restart)
2779static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, 2801static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2780 ktime_t *time, int trylock) 2802 ktime_t *time, int trylock)
2781{ 2803{
2782 struct hrtimer_sleeper timeout, *to = NULL; 2804 struct hrtimer_sleeper timeout, *to;
2783 struct futex_pi_state *pi_state = NULL; 2805 struct futex_pi_state *pi_state = NULL;
2784 struct rt_mutex_waiter rt_waiter; 2806 struct rt_mutex_waiter rt_waiter;
2785 struct futex_hash_bucket *hb; 2807 struct futex_hash_bucket *hb;
@@ -2792,13 +2814,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2792 if (refill_pi_state_cache()) 2814 if (refill_pi_state_cache())
2793 return -ENOMEM; 2815 return -ENOMEM;
2794 2816
2795 if (time) { 2817 to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
2796 to = &timeout;
2797 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2798 HRTIMER_MODE_ABS);
2799 hrtimer_init_sleeper(to, current);
2800 hrtimer_set_expires(&to->timer, *time);
2801 }
2802 2818
2803retry: 2819retry:
2804 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); 2820 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
@@ -3195,7 +3211,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
3195 u32 val, ktime_t *abs_time, u32 bitset, 3211 u32 val, ktime_t *abs_time, u32 bitset,
3196 u32 __user *uaddr2) 3212 u32 __user *uaddr2)
3197{ 3213{
3198 struct hrtimer_sleeper timeout, *to = NULL; 3214 struct hrtimer_sleeper timeout, *to;
3199 struct futex_pi_state *pi_state = NULL; 3215 struct futex_pi_state *pi_state = NULL;
3200 struct rt_mutex_waiter rt_waiter; 3216 struct rt_mutex_waiter rt_waiter;
3201 struct futex_hash_bucket *hb; 3217 struct futex_hash_bucket *hb;
@@ -3212,15 +3228,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
3212 if (!bitset) 3228 if (!bitset)
3213 return -EINVAL; 3229 return -EINVAL;
3214 3230
3215 if (abs_time) { 3231 to = futex_setup_timer(abs_time, &timeout, flags,
3216 to = &timeout; 3232 current->timer_slack_ns);
3217 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
3218 CLOCK_REALTIME : CLOCK_MONOTONIC,
3219 HRTIMER_MODE_ABS);
3220 hrtimer_init_sleeper(to, current);
3221 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
3222 current->timer_slack_ns);
3223 }
3224 3233
3225 /* 3234 /*
3226 * The waiter is allocated on our stack, manipulated by the requeue 3235 * The waiter is allocated on our stack, manipulated by the requeue
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index ff6e352e3a6c..b4f53717d143 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -2,6 +2,9 @@
2 2
3obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o 3obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
4obj-$(CONFIG_IRQ_TIMINGS) += timings.o 4obj-$(CONFIG_IRQ_TIMINGS) += timings.o
5ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y)
6 CFLAGS_timings.o += -DDEBUG
7endif
5obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o 8obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
6obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o 9obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
7obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o 10obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index f18cd5aa33e8..4352b08ae48d 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -94,8 +94,7 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
94 return nodes; 94 return nodes;
95} 95}
96 96
97static int __irq_build_affinity_masks(const struct irq_affinity *affd, 97static int __irq_build_affinity_masks(unsigned int startvec,
98 unsigned int startvec,
99 unsigned int numvecs, 98 unsigned int numvecs,
100 unsigned int firstvec, 99 unsigned int firstvec,
101 cpumask_var_t *node_to_cpumask, 100 cpumask_var_t *node_to_cpumask,
@@ -171,8 +170,7 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd,
171 * 1) spread present CPU on these vectors 170 * 1) spread present CPU on these vectors
172 * 2) spread other possible CPUs on these vectors 171 * 2) spread other possible CPUs on these vectors
173 */ 172 */
174static int irq_build_affinity_masks(const struct irq_affinity *affd, 173static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
175 unsigned int startvec, unsigned int numvecs,
176 unsigned int firstvec, 174 unsigned int firstvec,
177 struct irq_affinity_desc *masks) 175 struct irq_affinity_desc *masks)
178{ 176{
@@ -197,7 +195,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
197 build_node_to_cpumask(node_to_cpumask); 195 build_node_to_cpumask(node_to_cpumask);
198 196
199 /* Spread on present CPUs starting from affd->pre_vectors */ 197 /* Spread on present CPUs starting from affd->pre_vectors */
200 nr_present = __irq_build_affinity_masks(affd, curvec, numvecs, 198 nr_present = __irq_build_affinity_masks(curvec, numvecs,
201 firstvec, node_to_cpumask, 199 firstvec, node_to_cpumask,
202 cpu_present_mask, nmsk, masks); 200 cpu_present_mask, nmsk, masks);
203 201
@@ -212,7 +210,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
212 else 210 else
213 curvec = firstvec + nr_present; 211 curvec = firstvec + nr_present;
214 cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); 212 cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
215 nr_others = __irq_build_affinity_masks(affd, curvec, numvecs, 213 nr_others = __irq_build_affinity_masks(curvec, numvecs,
216 firstvec, node_to_cpumask, 214 firstvec, node_to_cpumask,
217 npresmsk, nmsk, masks); 215 npresmsk, nmsk, masks);
218 put_online_cpus(); 216 put_online_cpus();
@@ -295,7 +293,7 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
295 unsigned int this_vecs = affd->set_size[i]; 293 unsigned int this_vecs = affd->set_size[i];
296 int ret; 294 int ret;
297 295
298 ret = irq_build_affinity_masks(affd, curvec, this_vecs, 296 ret = irq_build_affinity_masks(curvec, this_vecs,
299 curvec, masks); 297 curvec, masks);
300 if (ret) { 298 if (ret) {
301 kfree(masks); 299 kfree(masks);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 16cbf6beb276..ae60cae24e9a 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -90,7 +90,7 @@ unsigned long probe_irq_on(void)
90 /* It triggered already - consider it spurious. */ 90 /* It triggered already - consider it spurious. */
91 if (!(desc->istate & IRQS_WAITING)) { 91 if (!(desc->istate & IRQS_WAITING)) {
92 desc->istate &= ~IRQS_AUTODETECT; 92 desc->istate &= ~IRQS_AUTODETECT;
93 irq_shutdown(desc); 93 irq_shutdown_and_deactivate(desc);
94 } else 94 } else
95 if (i < 32) 95 if (i < 32)
96 mask |= 1 << i; 96 mask |= 1 << i;
@@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned long val)
127 mask |= 1 << i; 127 mask |= 1 << i;
128 128
129 desc->istate &= ~IRQS_AUTODETECT; 129 desc->istate &= ~IRQS_AUTODETECT;
130 irq_shutdown(desc); 130 irq_shutdown_and_deactivate(desc);
131 } 131 }
132 raw_spin_unlock_irq(&desc->lock); 132 raw_spin_unlock_irq(&desc->lock);
133 } 133 }
@@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val)
169 nr_of_irqs++; 169 nr_of_irqs++;
170 } 170 }
171 desc->istate &= ~IRQS_AUTODETECT; 171 desc->istate &= ~IRQS_AUTODETECT;
172 irq_shutdown(desc); 172 irq_shutdown_and_deactivate(desc);
173 } 173 }
174 raw_spin_unlock_irq(&desc->lock); 174 raw_spin_unlock_irq(&desc->lock);
175 } 175 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 29d6c7d070b4..b76703b2c0af 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc)
314 } 314 }
315 irq_state_clr_started(desc); 315 irq_state_clr_started(desc);
316 } 316 }
317}
318
319
320void irq_shutdown_and_deactivate(struct irq_desc *desc)
321{
322 irq_shutdown(desc);
317 /* 323 /*
318 * This must be called even if the interrupt was never started up, 324 * This must be called even if the interrupt was never started up,
319 * because the activation can happen before the interrupt is 325 * because the activation can happen before the interrupt is
@@ -748,6 +754,8 @@ void handle_fasteoi_nmi(struct irq_desc *desc)
748 unsigned int irq = irq_desc_get_irq(desc); 754 unsigned int irq = irq_desc_get_irq(desc);
749 irqreturn_t res; 755 irqreturn_t res;
750 756
757 __kstat_incr_irqs_this_cpu(desc);
758
751 trace_irq_handler_entry(irq, action); 759 trace_irq_handler_entry(irq, action);
752 /* 760 /*
753 * NMIs cannot be shared, there is only one action. 761 * NMIs cannot be shared, there is only one action.
@@ -962,6 +970,8 @@ void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc)
962 unsigned int irq = irq_desc_get_irq(desc); 970 unsigned int irq = irq_desc_get_irq(desc);
963 irqreturn_t res; 971 irqreturn_t res;
964 972
973 __kstat_incr_irqs_this_cpu(desc);
974
965 trace_irq_handler_entry(irq, action); 975 trace_irq_handler_entry(irq, action);
966 res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); 976 res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
967 trace_irq_handler_exit(irq, action, res); 977 trace_irq_handler_exit(irq, action, res);
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 5b1072e394b2..6c7ca2e983a5 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
116 */ 116 */
117 if (irqd_affinity_is_managed(d)) { 117 if (irqd_affinity_is_managed(d)) {
118 irqd_set_managed_shutdown(d); 118 irqd_set_managed_shutdown(d);
119 irq_shutdown(desc); 119 irq_shutdown_and_deactivate(desc);
120 return false; 120 return false;
121 } 121 }
122 affinity = cpu_online_mask; 122 affinity = cpu_online_mask;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 70c3053bc1f6..3924fbe829d4 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -82,6 +82,7 @@ extern int irq_activate_and_startup(struct irq_desc *desc, bool resend);
82extern int irq_startup(struct irq_desc *desc, bool resend, bool force); 82extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
83 83
84extern void irq_shutdown(struct irq_desc *desc); 84extern void irq_shutdown(struct irq_desc *desc);
85extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
85extern void irq_enable(struct irq_desc *desc); 86extern void irq_enable(struct irq_desc *desc);
86extern void irq_disable(struct irq_desc *desc); 87extern void irq_disable(struct irq_desc *desc);
87extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); 88extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
@@ -96,6 +97,10 @@ static inline void irq_mark_irq(unsigned int irq) { }
96extern void irq_mark_irq(unsigned int irq); 97extern void irq_mark_irq(unsigned int irq);
97#endif 98#endif
98 99
100extern int __irq_get_irqchip_state(struct irq_data *data,
101 enum irqchip_irq_state which,
102 bool *state);
103
99extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); 104extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
100 105
101irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags); 106irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
@@ -354,6 +359,16 @@ static inline int irq_timing_decode(u64 value, u64 *timestamp)
354 return value & U16_MAX; 359 return value & U16_MAX;
355} 360}
356 361
362static __always_inline void irq_timings_push(u64 ts, int irq)
363{
364 struct irq_timings *timings = this_cpu_ptr(&irq_timings);
365
366 timings->values[timings->count & IRQ_TIMINGS_MASK] =
367 irq_timing_encode(ts, irq);
368
369 timings->count++;
370}
371
357/* 372/*
358 * The function record_irq_time is only called in one place in the 373 * The function record_irq_time is only called in one place in the
359 * interrupts handler. We want this function always inline so the code 374 * interrupts handler. We want this function always inline so the code
@@ -367,15 +382,8 @@ static __always_inline void record_irq_time(struct irq_desc *desc)
367 if (!static_branch_likely(&irq_timing_enabled)) 382 if (!static_branch_likely(&irq_timing_enabled))
368 return; 383 return;
369 384
370 if (desc->istate & IRQS_TIMINGS) { 385 if (desc->istate & IRQS_TIMINGS)
371 struct irq_timings *timings = this_cpu_ptr(&irq_timings); 386 irq_timings_push(local_clock(), irq_desc_get_irq(desc));
372
373 timings->values[timings->count & IRQ_TIMINGS_MASK] =
374 irq_timing_encode(local_clock(),
375 irq_desc_get_irq(desc));
376
377 timings->count++;
378 }
379} 387}
380#else 388#else
381static inline void irq_remove_timings(struct irq_desc *desc) {} 389static inline void irq_remove_timings(struct irq_desc *desc) {}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index c52b737ab8e3..9484e88dabc2 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -680,6 +680,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
680 * @hwirq: The HW irq number to convert to a logical one 680 * @hwirq: The HW irq number to convert to a logical one
681 * @regs: Register file coming from the low-level handling code 681 * @regs: Register file coming from the low-level handling code
682 * 682 *
683 * This function must be called from an NMI context.
684 *
683 * Returns: 0 on success, or -EINVAL if conversion has failed 685 * Returns: 0 on success, or -EINVAL if conversion has failed
684 */ 686 */
685int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, 687int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
@@ -689,7 +691,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
689 unsigned int irq; 691 unsigned int irq;
690 int ret = 0; 692 int ret = 0;
691 693
692 nmi_enter(); 694 /*
695 * NMI context needs to be setup earlier in order to deal with tracing.
696 */
697 WARN_ON(!in_nmi());
693 698
694 irq = irq_find_mapping(domain, hwirq); 699 irq = irq_find_mapping(domain, hwirq);
695 700
@@ -702,7 +707,6 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
702 else 707 else
703 ret = -EINVAL; 708 ret = -EINVAL;
704 709
705 nmi_exit();
706 set_irq_regs(old_regs); 710 set_irq_regs(old_regs);
707 return ret; 711 return ret;
708} 712}
@@ -946,6 +950,11 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
946 *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; 950 *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
947} 951}
948 952
953static bool irq_is_nmi(struct irq_desc *desc)
954{
955 return desc->istate & IRQS_NMI;
956}
957
949/** 958/**
950 * kstat_irqs - Get the statistics for an interrupt 959 * kstat_irqs - Get the statistics for an interrupt
951 * @irq: The interrupt number 960 * @irq: The interrupt number
@@ -963,7 +972,8 @@ unsigned int kstat_irqs(unsigned int irq)
963 if (!desc || !desc->kstat_irqs) 972 if (!desc || !desc->kstat_irqs)
964 return 0; 973 return 0;
965 if (!irq_settings_is_per_cpu_devid(desc) && 974 if (!irq_settings_is_per_cpu_devid(desc) &&
966 !irq_settings_is_per_cpu(desc)) 975 !irq_settings_is_per_cpu(desc) &&
976 !irq_is_nmi(desc))
967 return desc->tot_count; 977 return desc->tot_count;
968 978
969 for_each_possible_cpu(cpu) 979 for_each_possible_cpu(cpu)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index a453e229f99c..3078d0e48bba 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
123 * @ops: domain callbacks 123 * @ops: domain callbacks
124 * @host_data: Controller private data pointer 124 * @host_data: Controller private data pointer
125 * 125 *
126 * Allocates and initialize and irq_domain structure. 126 * Allocates and initializes an irq_domain structure.
127 * Returns pointer to IRQ domain, or NULL on failure. 127 * Returns pointer to IRQ domain, or NULL on failure.
128 */ 128 */
129struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, 129struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
@@ -139,7 +139,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
139 139
140 domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), 140 domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
141 GFP_KERNEL, of_node_to_nid(of_node)); 141 GFP_KERNEL, of_node_to_nid(of_node));
142 if (WARN_ON(!domain)) 142 if (!domain)
143 return NULL; 143 return NULL;
144 144
145 if (fwnode && is_fwnode_irqchip(fwnode)) { 145 if (fwnode && is_fwnode_irqchip(fwnode)) {
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 78f3ddeb7fe4..e8f7f179bf77 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/random.h> 14#include <linux/random.h>
15#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/irqdomain.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/sched/rt.h> 19#include <linux/sched/rt.h>
@@ -34,8 +35,9 @@ static int __init setup_forced_irqthreads(char *arg)
34early_param("threadirqs", setup_forced_irqthreads); 35early_param("threadirqs", setup_forced_irqthreads);
35#endif 36#endif
36 37
37static void __synchronize_hardirq(struct irq_desc *desc) 38static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
38{ 39{
40 struct irq_data *irqd = irq_desc_get_irq_data(desc);
39 bool inprogress; 41 bool inprogress;
40 42
41 do { 43 do {
@@ -51,6 +53,20 @@ static void __synchronize_hardirq(struct irq_desc *desc)
51 /* Ok, that indicated we're done: double-check carefully. */ 53 /* Ok, that indicated we're done: double-check carefully. */
52 raw_spin_lock_irqsave(&desc->lock, flags); 54 raw_spin_lock_irqsave(&desc->lock, flags);
53 inprogress = irqd_irq_inprogress(&desc->irq_data); 55 inprogress = irqd_irq_inprogress(&desc->irq_data);
56
57 /*
58 * If requested and supported, check at the chip whether it
59 * is in flight at the hardware level, i.e. already pending
60 * in a CPU and waiting for service and acknowledge.
61 */
62 if (!inprogress && sync_chip) {
63 /*
64 * Ignore the return code. inprogress is only updated
65 * when the chip supports it.
66 */
67 __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
68 &inprogress);
69 }
54 raw_spin_unlock_irqrestore(&desc->lock, flags); 70 raw_spin_unlock_irqrestore(&desc->lock, flags);
55 71
56 /* Oops, that failed? */ 72 /* Oops, that failed? */
@@ -73,13 +89,18 @@ static void __synchronize_hardirq(struct irq_desc *desc)
73 * Returns: false if a threaded handler is active. 89 * Returns: false if a threaded handler is active.
74 * 90 *
75 * This function may be called - with care - from IRQ context. 91 * This function may be called - with care - from IRQ context.
92 *
93 * It does not check whether there is an interrupt in flight at the
94 * hardware level, but not serviced yet, as this might deadlock when
95 * called with interrupts disabled and the target CPU of the interrupt
96 * is the current CPU.
76 */ 97 */
77bool synchronize_hardirq(unsigned int irq) 98bool synchronize_hardirq(unsigned int irq)
78{ 99{
79 struct irq_desc *desc = irq_to_desc(irq); 100 struct irq_desc *desc = irq_to_desc(irq);
80 101
81 if (desc) { 102 if (desc) {
82 __synchronize_hardirq(desc); 103 __synchronize_hardirq(desc, false);
83 return !atomic_read(&desc->threads_active); 104 return !atomic_read(&desc->threads_active);
84 } 105 }
85 106
@@ -95,14 +116,19 @@ EXPORT_SYMBOL(synchronize_hardirq);
95 * to complete before returning. If you use this function while 116 * to complete before returning. If you use this function while
96 * holding a resource the IRQ handler may need you will deadlock. 117 * holding a resource the IRQ handler may need you will deadlock.
97 * 118 *
98 * This function may be called - with care - from IRQ context. 119 * Can only be called from preemptible code as it might sleep when
120 * an interrupt thread is associated to @irq.
121 *
122 * It optionally makes sure (when the irq chip supports that method)
123 * that the interrupt is not pending in any CPU and waiting for
124 * service.
99 */ 125 */
100void synchronize_irq(unsigned int irq) 126void synchronize_irq(unsigned int irq)
101{ 127{
102 struct irq_desc *desc = irq_to_desc(irq); 128 struct irq_desc *desc = irq_to_desc(irq);
103 129
104 if (desc) { 130 if (desc) {
105 __synchronize_hardirq(desc); 131 __synchronize_hardirq(desc, true);
106 /* 132 /*
107 * We made sure that no hardirq handler is 133 * We made sure that no hardirq handler is
108 * running. Now verify that no threaded handlers are 134 * running. Now verify that no threaded handlers are
@@ -1699,6 +1725,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
1699 /* If this was the last handler, shut down the IRQ line: */ 1725 /* If this was the last handler, shut down the IRQ line: */
1700 if (!desc->action) { 1726 if (!desc->action) {
1701 irq_settings_clr_disable_unlazy(desc); 1727 irq_settings_clr_disable_unlazy(desc);
1728 /* Only shutdown. Deactivate after synchronize_hardirq() */
1702 irq_shutdown(desc); 1729 irq_shutdown(desc);
1703 } 1730 }
1704 1731
@@ -1727,8 +1754,12 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
1727 1754
1728 unregister_handler_proc(irq, action); 1755 unregister_handler_proc(irq, action);
1729 1756
1730 /* Make sure it's not being used on another CPU: */ 1757 /*
1731 synchronize_hardirq(irq); 1758 * Make sure it's not being used on another CPU and if the chip
1759 * supports it also make sure that there is no (not yet serviced)
1760 * interrupt in flight at the hardware level.
1761 */
1762 __synchronize_hardirq(desc, true);
1732 1763
1733#ifdef CONFIG_DEBUG_SHIRQ 1764#ifdef CONFIG_DEBUG_SHIRQ
1734 /* 1765 /*
@@ -1768,6 +1799,14 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
1768 * require it to deallocate resources over the slow bus. 1799 * require it to deallocate resources over the slow bus.
1769 */ 1800 */
1770 chip_bus_lock(desc); 1801 chip_bus_lock(desc);
1802 /*
1803 * There is no interrupt on the fly anymore. Deactivate it
1804 * completely.
1805 */
1806 raw_spin_lock_irqsave(&desc->lock, flags);
1807 irq_domain_deactivate_irq(&desc->irq_data);
1808 raw_spin_unlock_irqrestore(&desc->lock, flags);
1809
1771 irq_release_resources(desc); 1810 irq_release_resources(desc);
1772 chip_bus_sync_unlock(desc); 1811 chip_bus_sync_unlock(desc);
1773 irq_remove_timings(desc); 1812 irq_remove_timings(desc);
@@ -1855,7 +1894,7 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
1855 } 1894 }
1856 1895
1857 irq_settings_clr_disable_unlazy(desc); 1896 irq_settings_clr_disable_unlazy(desc);
1858 irq_shutdown(desc); 1897 irq_shutdown_and_deactivate(desc);
1859 1898
1860 irq_release_resources(desc); 1899 irq_release_resources(desc);
1861 1900
@@ -2578,6 +2617,28 @@ out:
2578 irq_put_desc_unlock(desc, flags); 2617 irq_put_desc_unlock(desc, flags);
2579} 2618}
2580 2619
2620int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
2621 bool *state)
2622{
2623 struct irq_chip *chip;
2624 int err = -EINVAL;
2625
2626 do {
2627 chip = irq_data_get_irq_chip(data);
2628 if (chip->irq_get_irqchip_state)
2629 break;
2630#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
2631 data = data->parent_data;
2632#else
2633 data = NULL;
2634#endif
2635 } while (data);
2636
2637 if (data)
2638 err = chip->irq_get_irqchip_state(data, which, state);
2639 return err;
2640}
2641
2581/** 2642/**
2582 * irq_get_irqchip_state - returns the irqchip state of a interrupt. 2643 * irq_get_irqchip_state - returns the irqchip state of a interrupt.
2583 * @irq: Interrupt line that is forwarded to a VM 2644 * @irq: Interrupt line that is forwarded to a VM
@@ -2596,7 +2657,6 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
2596{ 2657{
2597 struct irq_desc *desc; 2658 struct irq_desc *desc;
2598 struct irq_data *data; 2659 struct irq_data *data;
2599 struct irq_chip *chip;
2600 unsigned long flags; 2660 unsigned long flags;
2601 int err = -EINVAL; 2661 int err = -EINVAL;
2602 2662
@@ -2606,19 +2666,7 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
2606 2666
2607 data = irq_desc_get_irq_data(desc); 2667 data = irq_desc_get_irq_data(desc);
2608 2668
2609 do { 2669 err = __irq_get_irqchip_state(data, which, state);
2610 chip = irq_data_get_irq_chip(data);
2611 if (chip->irq_get_irqchip_state)
2612 break;
2613#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
2614 data = data->parent_data;
2615#else
2616 data = NULL;
2617#endif
2618 } while (data);
2619
2620 if (data)
2621 err = chip->irq_get_irqchip_state(data, which, state);
2622 2670
2623 irq_put_desc_busunlock(desc, flags); 2671 irq_put_desc_busunlock(desc, flags);
2624 return err; 2672 return err;
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 90c735da15d0..e960d7ce7bcc 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -1,10 +1,12 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> 2// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
3#define pr_fmt(fmt) "irq_timings: " fmt
3 4
4#include <linux/kernel.h> 5#include <linux/kernel.h>
5#include <linux/percpu.h> 6#include <linux/percpu.h>
6#include <linux/slab.h> 7#include <linux/slab.h>
7#include <linux/static_key.h> 8#include <linux/static_key.h>
9#include <linux/init.h>
8#include <linux/interrupt.h> 10#include <linux/interrupt.h>
9#include <linux/idr.h> 11#include <linux/idr.h>
10#include <linux/irq.h> 12#include <linux/irq.h>
@@ -261,12 +263,29 @@ void irq_timings_disable(void)
261#define EMA_ALPHA_VAL 64 263#define EMA_ALPHA_VAL 64
262#define EMA_ALPHA_SHIFT 7 264#define EMA_ALPHA_SHIFT 7
263 265
264#define PREDICTION_PERIOD_MIN 2 266#define PREDICTION_PERIOD_MIN 3
265#define PREDICTION_PERIOD_MAX 5 267#define PREDICTION_PERIOD_MAX 5
266#define PREDICTION_FACTOR 4 268#define PREDICTION_FACTOR 4
267#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ 269#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
268#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ 270#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
269 271
272/*
273 * Number of elements in the circular buffer: If it happens it was
274 * flushed before, then the number of elements could be smaller than
275 * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is
276 * used as we wrapped. The index begins from zero when we did not
277 * wrap. That could be done in a nicer way with the proper circular
278 * array structure type but with the cost of extra computation in the
279 * interrupt handler hot path. We choose efficiency.
280 */
281#define for_each_irqts(i, irqts) \
282 for (i = irqts->count < IRQ_TIMINGS_SIZE ? \
283 0 : irqts->count & IRQ_TIMINGS_MASK, \
284 irqts->count = min(IRQ_TIMINGS_SIZE, \
285 irqts->count); \
286 irqts->count > 0; irqts->count--, \
287 i = (i + 1) & IRQ_TIMINGS_MASK)
288
270struct irqt_stat { 289struct irqt_stat {
271 u64 last_ts; 290 u64 last_ts;
272 u64 ema_time[PREDICTION_BUFFER_SIZE]; 291 u64 ema_time[PREDICTION_BUFFER_SIZE];
@@ -297,7 +316,16 @@ static u64 irq_timings_ema_new(u64 value, u64 ema_old)
297 316
298static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) 317static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
299{ 318{
300 int i; 319 int period;
320
321 /*
322 * Move the beginning pointer to the end minus the max period x 3.
323 * We are at the point we can begin searching the pattern
324 */
325 buffer = &buffer[len - (period_max * 3)];
326
327 /* Adjust the length to the maximum allowed period x 3 */
328 len = period_max * 3;
301 329
302 /* 330 /*
303 * The buffer contains the suite of intervals, in a ilog2 331 * The buffer contains the suite of intervals, in a ilog2
@@ -306,21 +334,45 @@ static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
306 * period beginning at the end of the buffer. We do that for 334 * period beginning at the end of the buffer. We do that for
307 * each suffix. 335 * each suffix.
308 */ 336 */
309 for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) { 337 for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {
310 338
311 int *begin = &buffer[len - (i * 3)]; 339 /*
312 int *ptr = begin; 340 * The first comparison always succeed because the
341 * suffix is deduced from the first n-period bytes of
342 * the buffer and we compare the initial suffix with
343 * itself, so we can skip the first iteration.
344 */
345 int idx = period;
346 size_t size = period;
313 347
314 /* 348 /*
315 * We look if the suite with period 'i' repeat 349 * We look if the suite with period 'i' repeat
316 * itself. If it is truncated at the end, as it 350 * itself. If it is truncated at the end, as it
317 * repeats we can use the period to find out the next 351 * repeats we can use the period to find out the next
318 * element. 352 * element with the modulo.
319 */ 353 */
320 while (!memcmp(ptr, begin, i * sizeof(*ptr))) { 354 while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {
321 ptr += i; 355
322 if (ptr >= &buffer[len]) 356 /*
323 return begin[((i * 3) % i)]; 357 * Move the index in a period basis
358 */
359 idx += size;
360
361 /*
362 * If this condition is reached, all previous
363 * memcmp were successful, so the period is
364 * found.
365 */
366 if (idx == len)
367 return buffer[len % period];
368
369 /*
370 * If the remaining elements to compare are
371 * smaller than the period, readjust the size
372 * of the comparison for the last iteration.
373 */
374 if (len - idx < period)
375 size = len - idx;
324 } 376 }
325 } 377 }
326 378
@@ -380,11 +432,43 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
380 return irqs->last_ts + irqs->ema_time[index]; 432 return irqs->last_ts + irqs->ema_time[index];
381} 433}
382 434
435static __always_inline int irq_timings_interval_index(u64 interval)
436{
437 /*
438 * The PREDICTION_FACTOR increase the interval size for the
439 * array of exponential average.
440 */
441 u64 interval_us = (interval >> 10) / PREDICTION_FACTOR;
442
443 return likely(interval_us) ? ilog2(interval_us) : 0;
444}
445
446static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
447 u64 interval)
448{
449 int index;
450
451 /*
452 * Get the index in the ema table for this interrupt.
453 */
454 index = irq_timings_interval_index(interval);
455
456 /*
457 * Store the index as an element of the pattern in another
458 * circular array.
459 */
460 irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
461
462 irqs->ema_time[index] = irq_timings_ema_new(interval,
463 irqs->ema_time[index]);
464
465 irqs->count++;
466}
467
383static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) 468static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
384{ 469{
385 u64 old_ts = irqs->last_ts; 470 u64 old_ts = irqs->last_ts;
386 u64 interval; 471 u64 interval;
387 int index;
388 472
389 /* 473 /*
390 * The timestamps are absolute time values, we need to compute 474 * The timestamps are absolute time values, we need to compute
@@ -415,24 +499,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
415 return; 499 return;
416 } 500 }
417 501
418 /* 502 __irq_timings_store(irq, irqs, interval);
419 * Get the index in the ema table for this interrupt. The
420 * PREDICTION_FACTOR increase the interval size for the array
421 * of exponential average.
422 */
423 index = likely(interval) ?
424 ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
425
426 /*
427 * Store the index as an element of the pattern in another
428 * circular array.
429 */
430 irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
431
432 irqs->ema_time[index] = irq_timings_ema_new(interval,
433 irqs->ema_time[index]);
434
435 irqs->count++;
436} 503}
437 504
438/** 505/**
@@ -493,11 +560,7 @@ u64 irq_timings_next_event(u64 now)
493 * model while decrementing the counter because we consume the 560 * model while decrementing the counter because we consume the
494 * data from our circular buffer. 561 * data from our circular buffer.
495 */ 562 */
496 563 for_each_irqts(i, irqts) {
497 i = (irqts->count & IRQ_TIMINGS_MASK) - 1;
498 irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
499
500 for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
501 irq = irq_timing_decode(irqts->values[i], &ts); 564 irq = irq_timing_decode(irqts->values[i], &ts);
502 s = idr_find(&irqt_stats, irq); 565 s = idr_find(&irqt_stats, irq);
503 if (s) 566 if (s)
@@ -564,3 +627,325 @@ int irq_timings_alloc(int irq)
564 627
565 return 0; 628 return 0;
566} 629}
630
631#ifdef CONFIG_TEST_IRQ_TIMINGS
632struct timings_intervals {
633 u64 *intervals;
634 size_t count;
635};
636
637/*
638 * Intervals are given in nanosecond base
639 */
640static u64 intervals0[] __initdata = {
641 10000, 50000, 200000, 500000,
642 10000, 50000, 200000, 500000,
643 10000, 50000, 200000, 500000,
644 10000, 50000, 200000, 500000,
645 10000, 50000, 200000, 500000,
646 10000, 50000, 200000, 500000,
647 10000, 50000, 200000, 500000,
648 10000, 50000, 200000, 500000,
649 10000, 50000, 200000,
650};
651
652static u64 intervals1[] __initdata = {
653 223947000, 1240000, 1384000, 1386000, 1386000,
654 217416000, 1236000, 1384000, 1386000, 1387000,
655 214719000, 1241000, 1386000, 1387000, 1384000,
656 213696000, 1234000, 1384000, 1386000, 1388000,
657 219904000, 1240000, 1385000, 1389000, 1385000,
658 212240000, 1240000, 1386000, 1386000, 1386000,
659 214415000, 1236000, 1384000, 1386000, 1387000,
660 214276000, 1234000,
661};
662
663static u64 intervals2[] __initdata = {
664 4000, 3000, 5000, 100000,
665 3000, 3000, 5000, 117000,
666 4000, 4000, 5000, 112000,
667 4000, 3000, 4000, 110000,
668 3000, 5000, 3000, 117000,
669 4000, 4000, 5000, 112000,
670 4000, 3000, 4000, 110000,
671 3000, 4000, 5000, 112000,
672 4000,
673};
674
675static u64 intervals3[] __initdata = {
676 1385000, 212240000, 1240000,
677 1386000, 214415000, 1236000,
678 1384000, 214276000, 1234000,
679 1386000, 214415000, 1236000,
680 1385000, 212240000, 1240000,
681 1386000, 214415000, 1236000,
682 1384000, 214276000, 1234000,
683 1386000, 214415000, 1236000,
684 1385000, 212240000, 1240000,
685};
686
687static u64 intervals4[] __initdata = {
688 10000, 50000, 10000, 50000,
689 10000, 50000, 10000, 50000,
690 10000, 50000, 10000, 50000,
691 10000, 50000, 10000, 50000,
692 10000, 50000, 10000, 50000,
693 10000, 50000, 10000, 50000,
694 10000, 50000, 10000, 50000,
695 10000, 50000, 10000, 50000,
696 10000,
697};
698
699static struct timings_intervals tis[] __initdata = {
700 { intervals0, ARRAY_SIZE(intervals0) },
701 { intervals1, ARRAY_SIZE(intervals1) },
702 { intervals2, ARRAY_SIZE(intervals2) },
703 { intervals3, ARRAY_SIZE(intervals3) },
704 { intervals4, ARRAY_SIZE(intervals4) },
705};
706
707static int __init irq_timings_test_next_index(struct timings_intervals *ti)
708{
709 int _buffer[IRQ_TIMINGS_SIZE];
710 int buffer[IRQ_TIMINGS_SIZE];
711 int index, start, i, count, period_max;
712
713 count = ti->count - 1;
714
715 period_max = count > (3 * PREDICTION_PERIOD_MAX) ?
716 PREDICTION_PERIOD_MAX : count / 3;
717
718 /*
719 * Inject all values except the last one which will be used
720 * to compare with the next index result.
721 */
722 pr_debug("index suite: ");
723
724 for (i = 0; i < count; i++) {
725 index = irq_timings_interval_index(ti->intervals[i]);
726 _buffer[i & IRQ_TIMINGS_MASK] = index;
727 pr_cont("%d ", index);
728 }
729
730 start = count < IRQ_TIMINGS_SIZE ? 0 :
731 count & IRQ_TIMINGS_MASK;
732
733 count = min_t(int, count, IRQ_TIMINGS_SIZE);
734
735 for (i = 0; i < count; i++) {
736 int index = (start + i) & IRQ_TIMINGS_MASK;
737 buffer[i] = _buffer[index];
738 }
739
740 index = irq_timings_next_event_index(buffer, count, period_max);
741 i = irq_timings_interval_index(ti->intervals[ti->count - 1]);
742
743 if (index != i) {
744 pr_err("Expected (%d) and computed (%d) next indexes differ\n",
745 i, index);
746 return -EINVAL;
747 }
748
749 return 0;
750}
751
752static int __init irq_timings_next_index_selftest(void)
753{
754 int i, ret;
755
756 for (i = 0; i < ARRAY_SIZE(tis); i++) {
757
758 pr_info("---> Injecting intervals number #%d (count=%zd)\n",
759 i, tis[i].count);
760
761 ret = irq_timings_test_next_index(&tis[i]);
762 if (ret)
763 break;
764 }
765
766 return ret;
767}
768
769static int __init irq_timings_test_irqs(struct timings_intervals *ti)
770{
771 struct irqt_stat __percpu *s;
772 struct irqt_stat *irqs;
773 int i, index, ret, irq = 0xACE5;
774
775 ret = irq_timings_alloc(irq);
776 if (ret) {
777 pr_err("Failed to allocate irq timings\n");
778 return ret;
779 }
780
781 s = idr_find(&irqt_stats, irq);
782 if (!s) {
783 ret = -EIDRM;
784 goto out;
785 }
786
787 irqs = this_cpu_ptr(s);
788
789 for (i = 0; i < ti->count; i++) {
790
791 index = irq_timings_interval_index(ti->intervals[i]);
792 pr_debug("%d: interval=%llu ema_index=%d\n",
793 i, ti->intervals[i], index);
794
795 __irq_timings_store(irq, irqs, ti->intervals[i]);
796 if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
797 pr_err("Failed to store in the circular buffer\n");
798 goto out;
799 }
800 }
801
802 if (irqs->count != ti->count) {
803 pr_err("Count differs\n");
804 goto out;
805 }
806
807 ret = 0;
808out:
809 irq_timings_free(irq);
810
811 return ret;
812}
813
814static int __init irq_timings_irqs_selftest(void)
815{
816 int i, ret;
817
818 for (i = 0; i < ARRAY_SIZE(tis); i++) {
819 pr_info("---> Injecting intervals number #%d (count=%zd)\n",
820 i, tis[i].count);
821 ret = irq_timings_test_irqs(&tis[i]);
822 if (ret)
823 break;
824 }
825
826 return ret;
827}
828
829static int __init irq_timings_test_irqts(struct irq_timings *irqts,
830 unsigned count)
831{
832 int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0;
833 int i, irq, oirq = 0xBEEF;
834 u64 ots = 0xDEAD, ts;
835
836 /*
837 * Fill the circular buffer by using the dedicated function.
838 */
839 for (i = 0; i < count; i++) {
840 pr_debug("%d: index=%d, ts=%llX irq=%X\n",
841 i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i);
842
843 irq_timings_push(ots + i, oirq + i);
844 }
845
846 /*
847 * Compute the first elements values after the index wrapped
848 * up or not.
849 */
850 ots += start;
851 oirq += start;
852
853 /*
854 * Test the circular buffer count is correct.
855 */
856 pr_debug("---> Checking timings array count (%d) is right\n", count);
857 if (WARN_ON(irqts->count != count))
858 return -EINVAL;
859
860 /*
861 * Test the macro allowing to browse all the irqts.
862 */
863 pr_debug("---> Checking the for_each_irqts() macro\n");
864 for_each_irqts(i, irqts) {
865
866 irq = irq_timing_decode(irqts->values[i], &ts);
867
868 pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n",
869 i, ts, ots, irq, oirq);
870
871 if (WARN_ON(ts != ots || irq != oirq))
872 return -EINVAL;
873
874 ots++; oirq++;
875 }
876
877 /*
878 * The circular buffer should have be flushed when browsed
879 * with for_each_irqts
880 */
881 pr_debug("---> Checking timings array is empty after browsing it\n");
882 if (WARN_ON(irqts->count))
883 return -EINVAL;
884
885 return 0;
886}
887
888static int __init irq_timings_irqts_selftest(void)
889{
890 struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
891 int i, ret;
892
893 /*
894 * Test the circular buffer with different number of
895 * elements. The purpose is to test at the limits (empty, half
896 * full, full, wrapped with the cursor at the boundaries,
897 * wrapped several times, etc ...
898 */
899 int count[] = { 0,
900 IRQ_TIMINGS_SIZE >> 1,
901 IRQ_TIMINGS_SIZE,
902 IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1),
903 2 * IRQ_TIMINGS_SIZE,
904 (2 * IRQ_TIMINGS_SIZE) + 3,
905 };
906
907 for (i = 0; i < ARRAY_SIZE(count); i++) {
908
909 pr_info("---> Checking the timings with %d/%d values\n",
910 count[i], IRQ_TIMINGS_SIZE);
911
912 ret = irq_timings_test_irqts(irqts, count[i]);
913 if (ret)
914 break;
915 }
916
917 return ret;
918}
919
920static int __init irq_timings_selftest(void)
921{
922 int ret;
923
924 pr_info("------------------- selftest start -----------------\n");
925
926 /*
927 * At this point, we don't except any subsystem to use the irq
928 * timings but us, so it should not be enabled.
929 */
930 if (static_branch_unlikely(&irq_timing_enabled)) {
931 pr_warn("irq timings already initialized, skipping selftest\n");
932 return 0;
933 }
934
935 ret = irq_timings_irqts_selftest();
936 if (ret)
937 goto out;
938
939 ret = irq_timings_irqs_selftest();
940 if (ret)
941 goto out;
942
943 ret = irq_timings_next_index_selftest();
944out:
945 pr_info("---------- selftest end with %s -----------\n",
946 ret ? "failure" : "success");
947
948 return ret;
949}
950early_initcall(irq_timings_selftest);
951#endif
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 0bfa10f4410c..df3008419a1d 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -37,12 +37,26 @@ static int jump_label_cmp(const void *a, const void *b)
37 const struct jump_entry *jea = a; 37 const struct jump_entry *jea = a;
38 const struct jump_entry *jeb = b; 38 const struct jump_entry *jeb = b;
39 39
40 /*
41 * Entrires are sorted by key.
42 */
40 if (jump_entry_key(jea) < jump_entry_key(jeb)) 43 if (jump_entry_key(jea) < jump_entry_key(jeb))
41 return -1; 44 return -1;
42 45
43 if (jump_entry_key(jea) > jump_entry_key(jeb)) 46 if (jump_entry_key(jea) > jump_entry_key(jeb))
44 return 1; 47 return 1;
45 48
49 /*
50 * In the batching mode, entries should also be sorted by the code
51 * inside the already sorted list of entries, enabling a bsearch in
52 * the vector.
53 */
54 if (jump_entry_code(jea) < jump_entry_code(jeb))
55 return -1;
56
57 if (jump_entry_code(jea) > jump_entry_code(jeb))
58 return 1;
59
46 return 0; 60 return 0;
47} 61}
48 62
@@ -384,25 +398,55 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry)
384 return enabled ^ branch; 398 return enabled ^ branch;
385} 399}
386 400
401static bool jump_label_can_update(struct jump_entry *entry, bool init)
402{
403 /*
404 * Cannot update code that was in an init text area.
405 */
406 if (!init && jump_entry_is_init(entry))
407 return false;
408
409 if (!kernel_text_address(jump_entry_code(entry))) {
410 WARN_ONCE(1, "can't patch jump_label at %pS", (void *)jump_entry_code(entry));
411 return false;
412 }
413
414 return true;
415}
416
417#ifndef HAVE_JUMP_LABEL_BATCH
387static void __jump_label_update(struct static_key *key, 418static void __jump_label_update(struct static_key *key,
388 struct jump_entry *entry, 419 struct jump_entry *entry,
389 struct jump_entry *stop, 420 struct jump_entry *stop,
390 bool init) 421 bool init)
391{ 422{
392 for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { 423 for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
393 /* 424 if (jump_label_can_update(entry, init))
394 * An entry->code of 0 indicates an entry which has been 425 arch_jump_label_transform(entry, jump_label_type(entry));
395 * disabled because it was in an init text area. 426 }
396 */ 427}
397 if (init || !jump_entry_is_init(entry)) { 428#else
398 if (kernel_text_address(jump_entry_code(entry))) 429static void __jump_label_update(struct static_key *key,
399 arch_jump_label_transform(entry, jump_label_type(entry)); 430 struct jump_entry *entry,
400 else 431 struct jump_entry *stop,
401 WARN_ONCE(1, "can't patch jump_label at %pS", 432 bool init)
402 (void *)jump_entry_code(entry)); 433{
434 for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
435
436 if (!jump_label_can_update(entry, init))
437 continue;
438
439 if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) {
440 /*
441 * Queue is full: Apply the current queue and try again.
442 */
443 arch_jump_label_transform_apply();
444 BUG_ON(!arch_jump_label_transform_queue(entry, jump_label_type(entry)));
403 } 445 }
404 } 446 }
447 arch_jump_label_transform_apply();
405} 448}
449#endif
406 450
407void __init jump_label_init(void) 451void __init jump_label_init(void)
408{ 452{
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 6fe2f333aecb..45452facff3b 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -3,7 +3,7 @@
3# and is generally not a function of system call inputs. 3# and is generally not a function of system call inputs.
4KCOV_INSTRUMENT := n 4KCOV_INSTRUMENT := n
5 5
6obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o 6obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
7 7
8ifdef CONFIG_FUNCTION_TRACER 8ifdef CONFIG_FUNCTION_TRACER
9CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) 9CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h
index 46b71af8eef2..8c7e7d25f09c 100644
--- a/kernel/locking/lock_events.h
+++ b/kernel/locking/lock_events.h
@@ -31,50 +31,13 @@ enum lock_events {
31DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]); 31DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
32 32
33/* 33/*
34 * The purpose of the lock event counting subsystem is to provide a low 34 * Increment the statistical counters. use raw_cpu_inc() because of lower
35 * overhead way to record the number of specific locking events by using 35 * overhead and we don't care if we loose the occasional update.
36 * percpu counters. It is the percpu sum that matters, not specifically
37 * how many of them happens in each cpu.
38 *
39 * It is possible that the same percpu counter may be modified in both
40 * the process and interrupt contexts. For architectures that perform
41 * percpu operation with multiple instructions, it is possible to lose
42 * count if a process context percpu update is interrupted in the middle
43 * and the same counter is updated in the interrupt context. Therefore,
44 * the generated percpu sum may not be precise. The error, if any, should
45 * be small and insignificant.
46 *
47 * For those architectures that do multi-instruction percpu operation,
48 * preemption in the middle and moving the task to another cpu may cause
49 * a larger error in the count. Again, this will be few and far between.
50 * Given the imprecise nature of the count and the possibility of resetting
51 * the count and doing the measurement again, this is not really a big
52 * problem.
53 *
54 * To get a better picture of what is happening under the hood, it is
55 * suggested that a few measurements should be taken with the counts
56 * reset in between to stamp out outliner because of these possible
57 * error conditions.
58 *
59 * To minimize overhead, we use __this_cpu_*() in all cases except when
60 * CONFIG_DEBUG_PREEMPT is defined. In this particular case, this_cpu_*()
61 * will be used to avoid the appearance of unwanted BUG messages.
62 */
63#ifdef CONFIG_DEBUG_PREEMPT
64#define lockevent_percpu_inc(x) this_cpu_inc(x)
65#define lockevent_percpu_add(x, v) this_cpu_add(x, v)
66#else
67#define lockevent_percpu_inc(x) __this_cpu_inc(x)
68#define lockevent_percpu_add(x, v) __this_cpu_add(x, v)
69#endif
70
71/*
72 * Increment the PV qspinlock statistical counters
73 */ 36 */
74static inline void __lockevent_inc(enum lock_events event, bool cond) 37static inline void __lockevent_inc(enum lock_events event, bool cond)
75{ 38{
76 if (cond) 39 if (cond)
77 lockevent_percpu_inc(lockevents[event]); 40 raw_cpu_inc(lockevents[event]);
78} 41}
79 42
80#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true) 43#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true)
@@ -82,7 +45,7 @@ static inline void __lockevent_inc(enum lock_events event, bool cond)
82 45
83static inline void __lockevent_add(enum lock_events event, int inc) 46static inline void __lockevent_add(enum lock_events event, int inc)
84{ 47{
85 lockevent_percpu_add(lockevents[event], inc); 48 raw_cpu_add(lockevents[event], inc);
86} 49}
87 50
88#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c) 51#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index ad7668cfc9da..239039d0ce21 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -56,12 +56,16 @@ LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
56LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */ 56LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
57LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */ 57LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
58LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */ 58LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
59LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */ 59LOCK_EVENT(rwsem_opt_rlock) /* # of opt-acquired read locks */
60LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */ 60LOCK_EVENT(rwsem_opt_wlock) /* # of opt-acquired write locks */
61LOCK_EVENT(rwsem_opt_fail) /* # of failed optspins */
62LOCK_EVENT(rwsem_opt_nospin) /* # of disabled optspins */
63LOCK_EVENT(rwsem_opt_norspin) /* # of disabled reader-only optspins */
64LOCK_EVENT(rwsem_opt_rlock2) /* # of opt-acquired 2ndary read locks */
61LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */ 65LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
62LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */ 66LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
63LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */ 67LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
64LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */ 68LOCK_EVENT(rwsem_rlock_handoff) /* # of read lock handoffs */
65LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */ 69LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
66LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */ 70LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
67LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */ 71LOCK_EVENT(rwsem_wlock_handoff) /* # of write lock handoffs */
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index c47788fa85f9..341f52117f88 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -151,17 +151,28 @@ unsigned long nr_lock_classes;
151static 151static
152#endif 152#endif
153struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; 153struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
154static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
154 155
155static inline struct lock_class *hlock_class(struct held_lock *hlock) 156static inline struct lock_class *hlock_class(struct held_lock *hlock)
156{ 157{
157 if (!hlock->class_idx) { 158 unsigned int class_idx = hlock->class_idx;
159
160 /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfield */
161 barrier();
162
163 if (!test_bit(class_idx, lock_classes_in_use)) {
158 /* 164 /*
159 * Someone passed in garbage, we give up. 165 * Someone passed in garbage, we give up.
160 */ 166 */
161 DEBUG_LOCKS_WARN_ON(1); 167 DEBUG_LOCKS_WARN_ON(1);
162 return NULL; 168 return NULL;
163 } 169 }
164 return lock_classes + hlock->class_idx - 1; 170
171 /*
172 * At this point, if the passed hlock->class_idx is still garbage,
173 * we just have to live with it
174 */
175 return lock_classes + class_idx;
165} 176}
166 177
167#ifdef CONFIG_LOCK_STAT 178#ifdef CONFIG_LOCK_STAT
@@ -359,6 +370,13 @@ static inline u64 iterate_chain_key(u64 key, u32 idx)
359 return k0 | (u64)k1 << 32; 370 return k0 | (u64)k1 << 32;
360} 371}
361 372
373void lockdep_init_task(struct task_struct *task)
374{
375 task->lockdep_depth = 0; /* no locks held yet */
376 task->curr_chain_key = INITIAL_CHAIN_KEY;
377 task->lockdep_recursion = 0;
378}
379
362void lockdep_off(void) 380void lockdep_off(void)
363{ 381{
364 current->lockdep_recursion++; 382 current->lockdep_recursion++;
@@ -419,13 +437,6 @@ static int verbose(struct lock_class *class)
419 return 0; 437 return 0;
420} 438}
421 439
422/*
423 * Stack-trace: tightly packed array of stack backtrace
424 * addresses. Protected by the graph_lock.
425 */
426unsigned long nr_stack_trace_entries;
427static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
428
429static void print_lockdep_off(const char *bug_msg) 440static void print_lockdep_off(const char *bug_msg)
430{ 441{
431 printk(KERN_DEBUG "%s\n", bug_msg); 442 printk(KERN_DEBUG "%s\n", bug_msg);
@@ -435,6 +446,15 @@ static void print_lockdep_off(const char *bug_msg)
435#endif 446#endif
436} 447}
437 448
449unsigned long nr_stack_trace_entries;
450
451#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
452/*
453 * Stack-trace: tightly packed array of stack backtrace
454 * addresses. Protected by the graph_lock.
455 */
456static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
457
438static int save_trace(struct lock_trace *trace) 458static int save_trace(struct lock_trace *trace)
439{ 459{
440 unsigned long *entries = stack_trace + nr_stack_trace_entries; 460 unsigned long *entries = stack_trace + nr_stack_trace_entries;
@@ -457,6 +477,7 @@ static int save_trace(struct lock_trace *trace)
457 477
458 return 1; 478 return 1;
459} 479}
480#endif
460 481
461unsigned int nr_hardirq_chains; 482unsigned int nr_hardirq_chains;
462unsigned int nr_softirq_chains; 483unsigned int nr_softirq_chains;
@@ -470,6 +491,7 @@ unsigned int max_lockdep_depth;
470DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); 491DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
471#endif 492#endif
472 493
494#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
473/* 495/*
474 * Locking printouts: 496 * Locking printouts:
475 */ 497 */
@@ -487,6 +509,7 @@ static const char *usage_str[] =
487#undef LOCKDEP_STATE 509#undef LOCKDEP_STATE
488 [LOCK_USED] = "INITIAL USE", 510 [LOCK_USED] = "INITIAL USE",
489}; 511};
512#endif
490 513
491const char * __get_key_name(struct lockdep_subclass_key *key, char *str) 514const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
492{ 515{
@@ -500,15 +523,26 @@ static inline unsigned long lock_flag(enum lock_usage_bit bit)
500 523
501static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit) 524static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
502{ 525{
526 /*
527 * The usage character defaults to '.' (i.e., irqs disabled and not in
528 * irq context), which is the safest usage category.
529 */
503 char c = '.'; 530 char c = '.';
504 531
505 if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK)) 532 /*
533 * The order of the following usage checks matters, which will
534 * result in the outcome character as follows:
535 *
536 * - '+': irq is enabled and not in irq context
537 * - '-': in irq context and irq is disabled
538 * - '?': in irq context and irq is enabled
539 */
540 if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK)) {
506 c = '+'; 541 c = '+';
507 if (class->usage_mask & lock_flag(bit)) { 542 if (class->usage_mask & lock_flag(bit))
508 c = '-';
509 if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
510 c = '?'; 543 c = '?';
511 } 544 } else if (class->usage_mask & lock_flag(bit))
545 c = '-';
512 546
513 return c; 547 return c;
514} 548}
@@ -572,19 +606,22 @@ static void print_lock(struct held_lock *hlock)
572 /* 606 /*
573 * We can be called locklessly through debug_show_all_locks() so be 607 * We can be called locklessly through debug_show_all_locks() so be
574 * extra careful, the hlock might have been released and cleared. 608 * extra careful, the hlock might have been released and cleared.
609 *
610 * If this indeed happens, lets pretend it does not hurt to continue
611 * to print the lock unless the hlock class_idx does not point to a
612 * registered class. The rationale here is: since we don't attempt
613 * to distinguish whether we are in this situation, if it just
614 * happened we can't count on class_idx to tell either.
575 */ 615 */
576 unsigned int class_idx = hlock->class_idx; 616 struct lock_class *lock = hlock_class(hlock);
577
578 /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfields: */
579 barrier();
580 617
581 if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) { 618 if (!lock) {
582 printk(KERN_CONT "<RELEASED>\n"); 619 printk(KERN_CONT "<RELEASED>\n");
583 return; 620 return;
584 } 621 }
585 622
586 printk(KERN_CONT "%p", hlock->instance); 623 printk(KERN_CONT "%p", hlock->instance);
587 print_lock_name(lock_classes + class_idx - 1); 624 print_lock_name(lock);
588 printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); 625 printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip);
589} 626}
590 627
@@ -732,7 +769,8 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
732 * Huh! same key, different name? Did someone trample 769 * Huh! same key, different name? Did someone trample
733 * on some memory? We're most confused. 770 * on some memory? We're most confused.
734 */ 771 */
735 WARN_ON_ONCE(class->name != lock->name); 772 WARN_ON_ONCE(class->name != lock->name &&
773 lock->key != &__lockdep_no_validate__);
736 return class; 774 return class;
737 } 775 }
738 } 776 }
@@ -838,11 +876,11 @@ static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS];
838static bool check_lock_chain_key(struct lock_chain *chain) 876static bool check_lock_chain_key(struct lock_chain *chain)
839{ 877{
840#ifdef CONFIG_PROVE_LOCKING 878#ifdef CONFIG_PROVE_LOCKING
841 u64 chain_key = 0; 879 u64 chain_key = INITIAL_CHAIN_KEY;
842 int i; 880 int i;
843 881
844 for (i = chain->base; i < chain->base + chain->depth; i++) 882 for (i = chain->base; i < chain->base + chain->depth; i++)
845 chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1); 883 chain_key = iterate_chain_key(chain_key, chain_hlocks[i]);
846 /* 884 /*
847 * The 'unsigned long long' casts avoid that a compiler warning 885 * The 'unsigned long long' casts avoid that a compiler warning
848 * is reported when building tools/lib/lockdep. 886 * is reported when building tools/lib/lockdep.
@@ -1117,6 +1155,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
1117 return NULL; 1155 return NULL;
1118 } 1156 }
1119 nr_lock_classes++; 1157 nr_lock_classes++;
1158 __set_bit(class - lock_classes, lock_classes_in_use);
1120 debug_atomic_inc(nr_unused_locks); 1159 debug_atomic_inc(nr_unused_locks);
1121 class->key = key; 1160 class->key = key;
1122 class->name = lock->name; 1161 class->name = lock->name;
@@ -1228,13 +1267,17 @@ static int add_lock_to_list(struct lock_class *this,
1228#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1) 1267#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1)
1229 1268
1230/* 1269/*
1231 * The circular_queue and helpers is used to implement the 1270 * The circular_queue and helpers are used to implement graph
1232 * breadth-first search(BFS)algorithem, by which we can build 1271 * breadth-first search (BFS) algorithm, by which we can determine
1233 * the shortest path from the next lock to be acquired to the 1272 * whether there is a path from a lock to another. In deadlock checks,
1234 * previous held lock if there is a circular between them. 1273 * a path from the next lock to be acquired to a previous held lock
1274 * indicates that adding the <prev> -> <next> lock dependency will
1275 * produce a circle in the graph. Breadth-first search instead of
1276 * depth-first search is used in order to find the shortest (circular)
1277 * path.
1235 */ 1278 */
1236struct circular_queue { 1279struct circular_queue {
1237 unsigned long element[MAX_CIRCULAR_QUEUE_SIZE]; 1280 struct lock_list *element[MAX_CIRCULAR_QUEUE_SIZE];
1238 unsigned int front, rear; 1281 unsigned int front, rear;
1239}; 1282};
1240 1283
@@ -1260,7 +1303,7 @@ static inline int __cq_full(struct circular_queue *cq)
1260 return ((cq->rear + 1) & CQ_MASK) == cq->front; 1303 return ((cq->rear + 1) & CQ_MASK) == cq->front;
1261} 1304}
1262 1305
1263static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) 1306static inline int __cq_enqueue(struct circular_queue *cq, struct lock_list *elem)
1264{ 1307{
1265 if (__cq_full(cq)) 1308 if (__cq_full(cq))
1266 return -1; 1309 return -1;
@@ -1270,14 +1313,21 @@ static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
1270 return 0; 1313 return 0;
1271} 1314}
1272 1315
1273static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) 1316/*
1317 * Dequeue an element from the circular_queue, return a lock_list if
1318 * the queue is not empty, or NULL if otherwise.
1319 */
1320static inline struct lock_list * __cq_dequeue(struct circular_queue *cq)
1274{ 1321{
1322 struct lock_list * lock;
1323
1275 if (__cq_empty(cq)) 1324 if (__cq_empty(cq))
1276 return -1; 1325 return NULL;
1277 1326
1278 *elem = cq->element[cq->front]; 1327 lock = cq->element[cq->front];
1279 cq->front = (cq->front + 1) & CQ_MASK; 1328 cq->front = (cq->front + 1) & CQ_MASK;
1280 return 0; 1329
1330 return lock;
1281} 1331}
1282 1332
1283static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) 1333static inline unsigned int __cq_get_elem_count(struct circular_queue *cq)
@@ -1322,13 +1372,32 @@ static inline int get_lock_depth(struct lock_list *child)
1322 return depth; 1372 return depth;
1323} 1373}
1324 1374
1375/*
1376 * Return the forward or backward dependency list.
1377 *
1378 * @lock: the lock_list to get its class's dependency list
1379 * @offset: the offset to struct lock_class to determine whether it is
1380 * locks_after or locks_before
1381 */
1382static inline struct list_head *get_dep_list(struct lock_list *lock, int offset)
1383{
1384 void *lock_class = lock->class;
1385
1386 return lock_class + offset;
1387}
1388
1389/*
1390 * Forward- or backward-dependency search, used for both circular dependency
1391 * checking and hardirq-unsafe/softirq-unsafe checking.
1392 */
1325static int __bfs(struct lock_list *source_entry, 1393static int __bfs(struct lock_list *source_entry,
1326 void *data, 1394 void *data,
1327 int (*match)(struct lock_list *entry, void *data), 1395 int (*match)(struct lock_list *entry, void *data),
1328 struct lock_list **target_entry, 1396 struct lock_list **target_entry,
1329 int forward) 1397 int offset)
1330{ 1398{
1331 struct lock_list *entry; 1399 struct lock_list *entry;
1400 struct lock_list *lock;
1332 struct list_head *head; 1401 struct list_head *head;
1333 struct circular_queue *cq = &lock_cq; 1402 struct circular_queue *cq = &lock_cq;
1334 int ret = 1; 1403 int ret = 1;
@@ -1339,31 +1408,21 @@ static int __bfs(struct lock_list *source_entry,
1339 goto exit; 1408 goto exit;
1340 } 1409 }
1341 1410
1342 if (forward) 1411 head = get_dep_list(source_entry, offset);
1343 head = &source_entry->class->locks_after;
1344 else
1345 head = &source_entry->class->locks_before;
1346
1347 if (list_empty(head)) 1412 if (list_empty(head))
1348 goto exit; 1413 goto exit;
1349 1414
1350 __cq_init(cq); 1415 __cq_init(cq);
1351 __cq_enqueue(cq, (unsigned long)source_entry); 1416 __cq_enqueue(cq, source_entry);
1352 1417
1353 while (!__cq_empty(cq)) { 1418 while ((lock = __cq_dequeue(cq))) {
1354 struct lock_list *lock;
1355
1356 __cq_dequeue(cq, (unsigned long *)&lock);
1357 1419
1358 if (!lock->class) { 1420 if (!lock->class) {
1359 ret = -2; 1421 ret = -2;
1360 goto exit; 1422 goto exit;
1361 } 1423 }
1362 1424
1363 if (forward) 1425 head = get_dep_list(lock, offset);
1364 head = &lock->class->locks_after;
1365 else
1366 head = &lock->class->locks_before;
1367 1426
1368 DEBUG_LOCKS_WARN_ON(!irqs_disabled()); 1427 DEBUG_LOCKS_WARN_ON(!irqs_disabled());
1369 1428
@@ -1377,7 +1436,7 @@ static int __bfs(struct lock_list *source_entry,
1377 goto exit; 1436 goto exit;
1378 } 1437 }
1379 1438
1380 if (__cq_enqueue(cq, (unsigned long)entry)) { 1439 if (__cq_enqueue(cq, entry)) {
1381 ret = -1; 1440 ret = -1;
1382 goto exit; 1441 goto exit;
1383 } 1442 }
@@ -1396,7 +1455,8 @@ static inline int __bfs_forwards(struct lock_list *src_entry,
1396 int (*match)(struct lock_list *entry, void *data), 1455 int (*match)(struct lock_list *entry, void *data),
1397 struct lock_list **target_entry) 1456 struct lock_list **target_entry)
1398{ 1457{
1399 return __bfs(src_entry, data, match, target_entry, 1); 1458 return __bfs(src_entry, data, match, target_entry,
1459 offsetof(struct lock_class, locks_after));
1400 1460
1401} 1461}
1402 1462
@@ -1405,16 +1465,11 @@ static inline int __bfs_backwards(struct lock_list *src_entry,
1405 int (*match)(struct lock_list *entry, void *data), 1465 int (*match)(struct lock_list *entry, void *data),
1406 struct lock_list **target_entry) 1466 struct lock_list **target_entry)
1407{ 1467{
1408 return __bfs(src_entry, data, match, target_entry, 0); 1468 return __bfs(src_entry, data, match, target_entry,
1469 offsetof(struct lock_class, locks_before));
1409 1470
1410} 1471}
1411 1472
1412/*
1413 * Recursive, forwards-direction lock-dependency checking, used for
1414 * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
1415 * checking.
1416 */
1417
1418static void print_lock_trace(struct lock_trace *trace, unsigned int spaces) 1473static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
1419{ 1474{
1420 unsigned long *entries = stack_trace + trace->offset; 1475 unsigned long *entries = stack_trace + trace->offset;
@@ -1426,16 +1481,15 @@ static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
1426 * Print a dependency chain entry (this is only done when a deadlock 1481 * Print a dependency chain entry (this is only done when a deadlock
1427 * has been detected): 1482 * has been detected):
1428 */ 1483 */
1429static noinline int 1484static noinline void
1430print_circular_bug_entry(struct lock_list *target, int depth) 1485print_circular_bug_entry(struct lock_list *target, int depth)
1431{ 1486{
1432 if (debug_locks_silent) 1487 if (debug_locks_silent)
1433 return 0; 1488 return;
1434 printk("\n-> #%u", depth); 1489 printk("\n-> #%u", depth);
1435 print_lock_name(target->class); 1490 print_lock_name(target->class);
1436 printk(KERN_CONT ":\n"); 1491 printk(KERN_CONT ":\n");
1437 print_lock_trace(&target->trace, 6); 1492 print_lock_trace(&target->trace, 6);
1438 return 0;
1439} 1493}
1440 1494
1441static void 1495static void
@@ -1492,7 +1546,7 @@ print_circular_lock_scenario(struct held_lock *src,
1492 * When a circular dependency is detected, print the 1546 * When a circular dependency is detected, print the
1493 * header first: 1547 * header first:
1494 */ 1548 */
1495static noinline int 1549static noinline void
1496print_circular_bug_header(struct lock_list *entry, unsigned int depth, 1550print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1497 struct held_lock *check_src, 1551 struct held_lock *check_src,
1498 struct held_lock *check_tgt) 1552 struct held_lock *check_tgt)
@@ -1500,7 +1554,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1500 struct task_struct *curr = current; 1554 struct task_struct *curr = current;
1501 1555
1502 if (debug_locks_silent) 1556 if (debug_locks_silent)
1503 return 0; 1557 return;
1504 1558
1505 pr_warn("\n"); 1559 pr_warn("\n");
1506 pr_warn("======================================================\n"); 1560 pr_warn("======================================================\n");
@@ -1518,8 +1572,6 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1518 pr_warn("\nthe existing dependency chain (in reverse order) is:\n"); 1572 pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
1519 1573
1520 print_circular_bug_entry(entry, depth); 1574 print_circular_bug_entry(entry, depth);
1521
1522 return 0;
1523} 1575}
1524 1576
1525static inline int class_equal(struct lock_list *entry, void *data) 1577static inline int class_equal(struct lock_list *entry, void *data)
@@ -1527,10 +1579,10 @@ static inline int class_equal(struct lock_list *entry, void *data)
1527 return entry->class == data; 1579 return entry->class == data;
1528} 1580}
1529 1581
1530static noinline int print_circular_bug(struct lock_list *this, 1582static noinline void print_circular_bug(struct lock_list *this,
1531 struct lock_list *target, 1583 struct lock_list *target,
1532 struct held_lock *check_src, 1584 struct held_lock *check_src,
1533 struct held_lock *check_tgt) 1585 struct held_lock *check_tgt)
1534{ 1586{
1535 struct task_struct *curr = current; 1587 struct task_struct *curr = current;
1536 struct lock_list *parent; 1588 struct lock_list *parent;
@@ -1538,10 +1590,10 @@ static noinline int print_circular_bug(struct lock_list *this,
1538 int depth; 1590 int depth;
1539 1591
1540 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1592 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1541 return 0; 1593 return;
1542 1594
1543 if (!save_trace(&this->trace)) 1595 if (!save_trace(&this->trace))
1544 return 0; 1596 return;
1545 1597
1546 depth = get_lock_depth(target); 1598 depth = get_lock_depth(target);
1547 1599
@@ -1563,21 +1615,17 @@ static noinline int print_circular_bug(struct lock_list *this,
1563 1615
1564 printk("\nstack backtrace:\n"); 1616 printk("\nstack backtrace:\n");
1565 dump_stack(); 1617 dump_stack();
1566
1567 return 0;
1568} 1618}
1569 1619
1570static noinline int print_bfs_bug(int ret) 1620static noinline void print_bfs_bug(int ret)
1571{ 1621{
1572 if (!debug_locks_off_graph_unlock()) 1622 if (!debug_locks_off_graph_unlock())
1573 return 0; 1623 return;
1574 1624
1575 /* 1625 /*
1576 * Breadth-first-search failed, graph got corrupted? 1626 * Breadth-first-search failed, graph got corrupted?
1577 */ 1627 */
1578 WARN(1, "lockdep bfs error:%d\n", ret); 1628 WARN(1, "lockdep bfs error:%d\n", ret);
1579
1580 return 0;
1581} 1629}
1582 1630
1583static int noop_count(struct lock_list *entry, void *data) 1631static int noop_count(struct lock_list *entry, void *data)
@@ -1640,36 +1688,95 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
1640} 1688}
1641 1689
1642/* 1690/*
1643 * Prove that the dependency graph starting at <entry> can not 1691 * Check that the dependency graph starting at <src> can lead to
1644 * lead to <target>. Print an error and return 0 if it does. 1692 * <target> or not. Print an error and return 0 if it does.
1645 */ 1693 */
1646static noinline int 1694static noinline int
1647check_noncircular(struct lock_list *root, struct lock_class *target, 1695check_path(struct lock_class *target, struct lock_list *src_entry,
1648 struct lock_list **target_entry) 1696 struct lock_list **target_entry)
1649{ 1697{
1650 int result; 1698 int ret;
1699
1700 ret = __bfs_forwards(src_entry, (void *)target, class_equal,
1701 target_entry);
1702
1703 if (unlikely(ret < 0))
1704 print_bfs_bug(ret);
1705
1706 return ret;
1707}
1708
1709/*
1710 * Prove that the dependency graph starting at <src> can not
1711 * lead to <target>. If it can, there is a circle when adding
1712 * <target> -> <src> dependency.
1713 *
1714 * Print an error and return 0 if it does.
1715 */
1716static noinline int
1717check_noncircular(struct held_lock *src, struct held_lock *target,
1718 struct lock_trace *trace)
1719{
1720 int ret;
1721 struct lock_list *uninitialized_var(target_entry);
1722 struct lock_list src_entry = {
1723 .class = hlock_class(src),
1724 .parent = NULL,
1725 };
1651 1726
1652 debug_atomic_inc(nr_cyclic_checks); 1727 debug_atomic_inc(nr_cyclic_checks);
1653 1728
1654 result = __bfs_forwards(root, target, class_equal, target_entry); 1729 ret = check_path(hlock_class(target), &src_entry, &target_entry);
1655 1730
1656 return result; 1731 if (unlikely(!ret)) {
1732 if (!trace->nr_entries) {
1733 /*
1734 * If save_trace fails here, the printing might
1735 * trigger a WARN but because of the !nr_entries it
1736 * should not do bad things.
1737 */
1738 save_trace(trace);
1739 }
1740
1741 print_circular_bug(&src_entry, target_entry, src, target);
1742 }
1743
1744 return ret;
1657} 1745}
1658 1746
1747#ifdef CONFIG_LOCKDEP_SMALL
1748/*
1749 * Check that the dependency graph starting at <src> can lead to
1750 * <target> or not. If it can, <src> -> <target> dependency is already
1751 * in the graph.
1752 *
1753 * Print an error and return 2 if it does or 1 if it does not.
1754 */
1659static noinline int 1755static noinline int
1660check_redundant(struct lock_list *root, struct lock_class *target, 1756check_redundant(struct held_lock *src, struct held_lock *target)
1661 struct lock_list **target_entry)
1662{ 1757{
1663 int result; 1758 int ret;
1759 struct lock_list *uninitialized_var(target_entry);
1760 struct lock_list src_entry = {
1761 .class = hlock_class(src),
1762 .parent = NULL,
1763 };
1664 1764
1665 debug_atomic_inc(nr_redundant_checks); 1765 debug_atomic_inc(nr_redundant_checks);
1666 1766
1667 result = __bfs_forwards(root, target, class_equal, target_entry); 1767 ret = check_path(hlock_class(target), &src_entry, &target_entry);
1668 1768
1669 return result; 1769 if (!ret) {
1770 debug_atomic_inc(nr_redundant);
1771 ret = 2;
1772 } else if (ret < 0)
1773 ret = 0;
1774
1775 return ret;
1670} 1776}
1777#endif
1671 1778
1672#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) 1779#ifdef CONFIG_TRACE_IRQFLAGS
1673 1780
1674static inline int usage_accumulate(struct lock_list *entry, void *mask) 1781static inline int usage_accumulate(struct lock_list *entry, void *mask)
1675{ 1782{
@@ -1766,7 +1873,7 @@ static void print_lock_class_header(struct lock_class *class, int depth)
1766 */ 1873 */
1767static void __used 1874static void __used
1768print_shortest_lock_dependencies(struct lock_list *leaf, 1875print_shortest_lock_dependencies(struct lock_list *leaf,
1769 struct lock_list *root) 1876 struct lock_list *root)
1770{ 1877{
1771 struct lock_list *entry = leaf; 1878 struct lock_list *entry = leaf;
1772 int depth; 1879 int depth;
@@ -1788,8 +1895,6 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
1788 entry = get_lock_parent(entry); 1895 entry = get_lock_parent(entry);
1789 depth--; 1896 depth--;
1790 } while (entry && (depth >= 0)); 1897 } while (entry && (depth >= 0));
1791
1792 return;
1793} 1898}
1794 1899
1795static void 1900static void
@@ -1848,7 +1953,7 @@ print_irq_lock_scenario(struct lock_list *safe_entry,
1848 printk("\n *** DEADLOCK ***\n\n"); 1953 printk("\n *** DEADLOCK ***\n\n");
1849} 1954}
1850 1955
1851static int 1956static void
1852print_bad_irq_dependency(struct task_struct *curr, 1957print_bad_irq_dependency(struct task_struct *curr,
1853 struct lock_list *prev_root, 1958 struct lock_list *prev_root,
1854 struct lock_list *next_root, 1959 struct lock_list *next_root,
@@ -1861,7 +1966,7 @@ print_bad_irq_dependency(struct task_struct *curr,
1861 const char *irqclass) 1966 const char *irqclass)
1862{ 1967{
1863 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1968 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1864 return 0; 1969 return;
1865 1970
1866 pr_warn("\n"); 1971 pr_warn("\n");
1867 pr_warn("=====================================================\n"); 1972 pr_warn("=====================================================\n");
@@ -1907,19 +2012,17 @@ print_bad_irq_dependency(struct task_struct *curr,
1907 2012
1908 pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); 2013 pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
1909 if (!save_trace(&prev_root->trace)) 2014 if (!save_trace(&prev_root->trace))
1910 return 0; 2015 return;
1911 print_shortest_lock_dependencies(backwards_entry, prev_root); 2016 print_shortest_lock_dependencies(backwards_entry, prev_root);
1912 2017
1913 pr_warn("\nthe dependencies between the lock to be acquired"); 2018 pr_warn("\nthe dependencies between the lock to be acquired");
1914 pr_warn(" and %s-irq-unsafe lock:\n", irqclass); 2019 pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
1915 if (!save_trace(&next_root->trace)) 2020 if (!save_trace(&next_root->trace))
1916 return 0; 2021 return;
1917 print_shortest_lock_dependencies(forwards_entry, next_root); 2022 print_shortest_lock_dependencies(forwards_entry, next_root);
1918 2023
1919 pr_warn("\nstack backtrace:\n"); 2024 pr_warn("\nstack backtrace:\n");
1920 dump_stack(); 2025 dump_stack();
1921
1922 return 0;
1923} 2026}
1924 2027
1925static const char *state_names[] = { 2028static const char *state_names[] = {
@@ -2066,8 +2169,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
2066 this.class = hlock_class(prev); 2169 this.class = hlock_class(prev);
2067 2170
2068 ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL); 2171 ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
2069 if (ret < 0) 2172 if (ret < 0) {
2070 return print_bfs_bug(ret); 2173 print_bfs_bug(ret);
2174 return 0;
2175 }
2071 2176
2072 usage_mask &= LOCKF_USED_IN_IRQ_ALL; 2177 usage_mask &= LOCKF_USED_IN_IRQ_ALL;
2073 if (!usage_mask) 2178 if (!usage_mask)
@@ -2083,8 +2188,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
2083 that.class = hlock_class(next); 2188 that.class = hlock_class(next);
2084 2189
2085 ret = find_usage_forwards(&that, forward_mask, &target_entry1); 2190 ret = find_usage_forwards(&that, forward_mask, &target_entry1);
2086 if (ret < 0) 2191 if (ret < 0) {
2087 return print_bfs_bug(ret); 2192 print_bfs_bug(ret);
2193 return 0;
2194 }
2088 if (ret == 1) 2195 if (ret == 1)
2089 return ret; 2196 return ret;
2090 2197
@@ -2096,8 +2203,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
2096 backward_mask = original_mask(target_entry1->class->usage_mask); 2203 backward_mask = original_mask(target_entry1->class->usage_mask);
2097 2204
2098 ret = find_usage_backwards(&this, backward_mask, &target_entry); 2205 ret = find_usage_backwards(&this, backward_mask, &target_entry);
2099 if (ret < 0) 2206 if (ret < 0) {
2100 return print_bfs_bug(ret); 2207 print_bfs_bug(ret);
2208 return 0;
2209 }
2101 if (DEBUG_LOCKS_WARN_ON(ret == 1)) 2210 if (DEBUG_LOCKS_WARN_ON(ret == 1))
2102 return 1; 2211 return 1;
2103 2212
@@ -2111,11 +2220,13 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
2111 if (DEBUG_LOCKS_WARN_ON(ret == -1)) 2220 if (DEBUG_LOCKS_WARN_ON(ret == -1))
2112 return 1; 2221 return 1;
2113 2222
2114 return print_bad_irq_dependency(curr, &this, &that, 2223 print_bad_irq_dependency(curr, &this, &that,
2115 target_entry, target_entry1, 2224 target_entry, target_entry1,
2116 prev, next, 2225 prev, next,
2117 backward_bit, forward_bit, 2226 backward_bit, forward_bit,
2118 state_name(backward_bit)); 2227 state_name(backward_bit));
2228
2229 return 0;
2119} 2230}
2120 2231
2121static void inc_chains(void) 2232static void inc_chains(void)
@@ -2143,11 +2254,10 @@ static inline void inc_chains(void)
2143 nr_process_chains++; 2254 nr_process_chains++;
2144} 2255}
2145 2256
2146#endif 2257#endif /* CONFIG_TRACE_IRQFLAGS */
2147 2258
2148static void 2259static void
2149print_deadlock_scenario(struct held_lock *nxt, 2260print_deadlock_scenario(struct held_lock *nxt, struct held_lock *prv)
2150 struct held_lock *prv)
2151{ 2261{
2152 struct lock_class *next = hlock_class(nxt); 2262 struct lock_class *next = hlock_class(nxt);
2153 struct lock_class *prev = hlock_class(prv); 2263 struct lock_class *prev = hlock_class(prv);
@@ -2165,12 +2275,12 @@ print_deadlock_scenario(struct held_lock *nxt,
2165 printk(" May be due to missing lock nesting notation\n\n"); 2275 printk(" May be due to missing lock nesting notation\n\n");
2166} 2276}
2167 2277
2168static int 2278static void
2169print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, 2279print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
2170 struct held_lock *next) 2280 struct held_lock *next)
2171{ 2281{
2172 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2282 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2173 return 0; 2283 return;
2174 2284
2175 pr_warn("\n"); 2285 pr_warn("\n");
2176 pr_warn("============================================\n"); 2286 pr_warn("============================================\n");
@@ -2189,8 +2299,6 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
2189 2299
2190 pr_warn("\nstack backtrace:\n"); 2300 pr_warn("\nstack backtrace:\n");
2191 dump_stack(); 2301 dump_stack();
2192
2193 return 0;
2194} 2302}
2195 2303
2196/* 2304/*
@@ -2202,8 +2310,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
2202 * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read 2310 * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
2203 */ 2311 */
2204static int 2312static int
2205check_deadlock(struct task_struct *curr, struct held_lock *next, 2313check_deadlock(struct task_struct *curr, struct held_lock *next)
2206 struct lockdep_map *next_instance, int read)
2207{ 2314{
2208 struct held_lock *prev; 2315 struct held_lock *prev;
2209 struct held_lock *nest = NULL; 2316 struct held_lock *nest = NULL;
@@ -2222,7 +2329,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
2222 * Allow read-after-read recursion of the same 2329 * Allow read-after-read recursion of the same
2223 * lock class (i.e. read_lock(lock)+read_lock(lock)): 2330 * lock class (i.e. read_lock(lock)+read_lock(lock)):
2224 */ 2331 */
2225 if ((read == 2) && prev->read) 2332 if ((next->read == 2) && prev->read)
2226 return 2; 2333 return 2;
2227 2334
2228 /* 2335 /*
@@ -2232,14 +2339,15 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
2232 if (nest) 2339 if (nest)
2233 return 2; 2340 return 2;
2234 2341
2235 return print_deadlock_bug(curr, prev, next); 2342 print_deadlock_bug(curr, prev, next);
2343 return 0;
2236 } 2344 }
2237 return 1; 2345 return 1;
2238} 2346}
2239 2347
2240/* 2348/*
2241 * There was a chain-cache miss, and we are about to add a new dependency 2349 * There was a chain-cache miss, and we are about to add a new dependency
2242 * to a previous lock. We recursively validate the following rules: 2350 * to a previous lock. We validate the following rules:
2243 * 2351 *
2244 * - would the adding of the <prev> -> <next> dependency create a 2352 * - would the adding of the <prev> -> <next> dependency create a
2245 * circular dependency in the graph? [== circular deadlock] 2353 * circular dependency in the graph? [== circular deadlock]
@@ -2263,9 +2371,7 @@ static int
2263check_prev_add(struct task_struct *curr, struct held_lock *prev, 2371check_prev_add(struct task_struct *curr, struct held_lock *prev,
2264 struct held_lock *next, int distance, struct lock_trace *trace) 2372 struct held_lock *next, int distance, struct lock_trace *trace)
2265{ 2373{
2266 struct lock_list *uninitialized_var(target_entry);
2267 struct lock_list *entry; 2374 struct lock_list *entry;
2268 struct lock_list this;
2269 int ret; 2375 int ret;
2270 2376
2271 if (!hlock_class(prev)->key || !hlock_class(next)->key) { 2377 if (!hlock_class(prev)->key || !hlock_class(next)->key) {
@@ -2289,28 +2395,16 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
2289 /* 2395 /*
2290 * Prove that the new <prev> -> <next> dependency would not 2396 * Prove that the new <prev> -> <next> dependency would not
2291 * create a circular dependency in the graph. (We do this by 2397 * create a circular dependency in the graph. (We do this by
2292 * forward-recursing into the graph starting at <next>, and 2398 * a breadth-first search into the graph starting at <next>,
2293 * checking whether we can reach <prev>.) 2399 * and check whether we can reach <prev>.)
2294 * 2400 *
2295 * We are using global variables to control the recursion, to 2401 * The search is limited by the size of the circular queue (i.e.,
2296 * keep the stackframe size of the recursive functions low: 2402 * MAX_CIRCULAR_QUEUE_SIZE) which keeps track of a breadth of nodes
2403 * in the graph whose neighbours are to be checked.
2297 */ 2404 */
2298 this.class = hlock_class(next); 2405 ret = check_noncircular(next, prev, trace);
2299 this.parent = NULL; 2406 if (unlikely(ret <= 0))
2300 ret = check_noncircular(&this, hlock_class(prev), &target_entry); 2407 return 0;
2301 if (unlikely(!ret)) {
2302 if (!trace->nr_entries) {
2303 /*
2304 * If save_trace fails here, the printing might
2305 * trigger a WARN but because of the !nr_entries it
2306 * should not do bad things.
2307 */
2308 save_trace(trace);
2309 }
2310 return print_circular_bug(&this, target_entry, next, prev);
2311 }
2312 else if (unlikely(ret < 0))
2313 return print_bfs_bug(ret);
2314 2408
2315 if (!check_irq_usage(curr, prev, next)) 2409 if (!check_irq_usage(curr, prev, next))
2316 return 0; 2410 return 0;
@@ -2341,19 +2435,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
2341 } 2435 }
2342 } 2436 }
2343 2437
2438#ifdef CONFIG_LOCKDEP_SMALL
2344 /* 2439 /*
2345 * Is the <prev> -> <next> link redundant? 2440 * Is the <prev> -> <next> link redundant?
2346 */ 2441 */
2347 this.class = hlock_class(prev); 2442 ret = check_redundant(prev, next);
2348 this.parent = NULL; 2443 if (ret != 1)
2349 ret = check_redundant(&this, hlock_class(next), &target_entry); 2444 return ret;
2350 if (!ret) { 2445#endif
2351 debug_atomic_inc(nr_redundant);
2352 return 2;
2353 }
2354 if (ret < 0)
2355 return print_bfs_bug(ret);
2356
2357 2446
2358 if (!trace->nr_entries && !save_trace(trace)) 2447 if (!trace->nr_entries && !save_trace(trace))
2359 return 0; 2448 return 0;
@@ -2505,12 +2594,13 @@ static void
2505print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next) 2594print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next)
2506{ 2595{
2507 struct held_lock *hlock; 2596 struct held_lock *hlock;
2508 u64 chain_key = 0; 2597 u64 chain_key = INITIAL_CHAIN_KEY;
2509 int depth = curr->lockdep_depth; 2598 int depth = curr->lockdep_depth;
2510 int i; 2599 int i = get_first_held_lock(curr, hlock_next);
2511 2600
2512 printk("depth: %u\n", depth + 1); 2601 printk("depth: %u (irq_context %u)\n", depth - i + 1,
2513 for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) { 2602 hlock_next->irq_context);
2603 for (; i < depth; i++) {
2514 hlock = curr->held_locks + i; 2604 hlock = curr->held_locks + i;
2515 chain_key = print_chain_key_iteration(hlock->class_idx, chain_key); 2605 chain_key = print_chain_key_iteration(hlock->class_idx, chain_key);
2516 2606
@@ -2524,13 +2614,13 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne
2524static void print_chain_keys_chain(struct lock_chain *chain) 2614static void print_chain_keys_chain(struct lock_chain *chain)
2525{ 2615{
2526 int i; 2616 int i;
2527 u64 chain_key = 0; 2617 u64 chain_key = INITIAL_CHAIN_KEY;
2528 int class_id; 2618 int class_id;
2529 2619
2530 printk("depth: %u\n", chain->depth); 2620 printk("depth: %u\n", chain->depth);
2531 for (i = 0; i < chain->depth; i++) { 2621 for (i = 0; i < chain->depth; i++) {
2532 class_id = chain_hlocks[chain->base + i]; 2622 class_id = chain_hlocks[chain->base + i];
2533 chain_key = print_chain_key_iteration(class_id + 1, chain_key); 2623 chain_key = print_chain_key_iteration(class_id, chain_key);
2534 2624
2535 print_lock_name(lock_classes + class_id); 2625 print_lock_name(lock_classes + class_id);
2536 printk("\n"); 2626 printk("\n");
@@ -2581,7 +2671,7 @@ static int check_no_collision(struct task_struct *curr,
2581 } 2671 }
2582 2672
2583 for (j = 0; j < chain->depth - 1; j++, i++) { 2673 for (j = 0; j < chain->depth - 1; j++, i++) {
2584 id = curr->held_locks[i].class_idx - 1; 2674 id = curr->held_locks[i].class_idx;
2585 2675
2586 if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) { 2676 if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) {
2587 print_collision(curr, hlock, chain); 2677 print_collision(curr, hlock, chain);
@@ -2664,7 +2754,7 @@ static inline int add_chain_cache(struct task_struct *curr,
2664 if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { 2754 if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
2665 chain->base = nr_chain_hlocks; 2755 chain->base = nr_chain_hlocks;
2666 for (j = 0; j < chain->depth - 1; j++, i++) { 2756 for (j = 0; j < chain->depth - 1; j++, i++) {
2667 int lock_id = curr->held_locks[i].class_idx - 1; 2757 int lock_id = curr->held_locks[i].class_idx;
2668 chain_hlocks[chain->base + j] = lock_id; 2758 chain_hlocks[chain->base + j] = lock_id;
2669 } 2759 }
2670 chain_hlocks[chain->base + j] = class - lock_classes; 2760 chain_hlocks[chain->base + j] = class - lock_classes;
@@ -2754,8 +2844,9 @@ cache_hit:
2754 return 1; 2844 return 1;
2755} 2845}
2756 2846
2757static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, 2847static int validate_chain(struct task_struct *curr,
2758 struct held_lock *hlock, int chain_head, u64 chain_key) 2848 struct held_lock *hlock,
2849 int chain_head, u64 chain_key)
2759{ 2850{
2760 /* 2851 /*
2761 * Trylock needs to maintain the stack of held locks, but it 2852 * Trylock needs to maintain the stack of held locks, but it
@@ -2776,12 +2867,18 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
2776 * - is softirq-safe, if this lock is hardirq-unsafe 2867 * - is softirq-safe, if this lock is hardirq-unsafe
2777 * 2868 *
2778 * And check whether the new lock's dependency graph 2869 * And check whether the new lock's dependency graph
2779 * could lead back to the previous lock. 2870 * could lead back to the previous lock:
2780 * 2871 *
2781 * any of these scenarios could lead to a deadlock. If 2872 * - within the current held-lock stack
2782 * All validations 2873 * - across our accumulated lock dependency records
2874 *
2875 * any of these scenarios could lead to a deadlock.
2783 */ 2876 */
2784 int ret = check_deadlock(curr, hlock, lock, hlock->read); 2877 /*
2878 * The simple case: does the current hold the same lock
2879 * already?
2880 */
2881 int ret = check_deadlock(curr, hlock);
2785 2882
2786 if (!ret) 2883 if (!ret)
2787 return 0; 2884 return 0;
@@ -2812,16 +2909,12 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
2812} 2909}
2813#else 2910#else
2814static inline int validate_chain(struct task_struct *curr, 2911static inline int validate_chain(struct task_struct *curr,
2815 struct lockdep_map *lock, struct held_lock *hlock, 2912 struct held_lock *hlock,
2816 int chain_head, u64 chain_key) 2913 int chain_head, u64 chain_key)
2817{ 2914{
2818 return 1; 2915 return 1;
2819} 2916}
2820 2917#endif /* CONFIG_PROVE_LOCKING */
2821static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
2822{
2823}
2824#endif
2825 2918
2826/* 2919/*
2827 * We are building curr_chain_key incrementally, so double-check 2920 * We are building curr_chain_key incrementally, so double-check
@@ -2832,7 +2925,7 @@ static void check_chain_key(struct task_struct *curr)
2832#ifdef CONFIG_DEBUG_LOCKDEP 2925#ifdef CONFIG_DEBUG_LOCKDEP
2833 struct held_lock *hlock, *prev_hlock = NULL; 2926 struct held_lock *hlock, *prev_hlock = NULL;
2834 unsigned int i; 2927 unsigned int i;
2835 u64 chain_key = 0; 2928 u64 chain_key = INITIAL_CHAIN_KEY;
2836 2929
2837 for (i = 0; i < curr->lockdep_depth; i++) { 2930 for (i = 0; i < curr->lockdep_depth; i++) {
2838 hlock = curr->held_locks + i; 2931 hlock = curr->held_locks + i;
@@ -2848,15 +2941,17 @@ static void check_chain_key(struct task_struct *curr)
2848 (unsigned long long)hlock->prev_chain_key); 2941 (unsigned long long)hlock->prev_chain_key);
2849 return; 2942 return;
2850 } 2943 }
2944
2851 /* 2945 /*
2852 * Whoops ran out of static storage again? 2946 * hlock->class_idx can't go beyond MAX_LOCKDEP_KEYS, but is
2947 * it registered lock class index?
2853 */ 2948 */
2854 if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS)) 2949 if (DEBUG_LOCKS_WARN_ON(!test_bit(hlock->class_idx, lock_classes_in_use)))
2855 return; 2950 return;
2856 2951
2857 if (prev_hlock && (prev_hlock->irq_context != 2952 if (prev_hlock && (prev_hlock->irq_context !=
2858 hlock->irq_context)) 2953 hlock->irq_context))
2859 chain_key = 0; 2954 chain_key = INITIAL_CHAIN_KEY;
2860 chain_key = iterate_chain_key(chain_key, hlock->class_idx); 2955 chain_key = iterate_chain_key(chain_key, hlock->class_idx);
2861 prev_hlock = hlock; 2956 prev_hlock = hlock;
2862 } 2957 }
@@ -2874,14 +2969,11 @@ static void check_chain_key(struct task_struct *curr)
2874#endif 2969#endif
2875} 2970}
2876 2971
2972#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
2877static int mark_lock(struct task_struct *curr, struct held_lock *this, 2973static int mark_lock(struct task_struct *curr, struct held_lock *this,
2878 enum lock_usage_bit new_bit); 2974 enum lock_usage_bit new_bit);
2879 2975
2880#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) 2976static void print_usage_bug_scenario(struct held_lock *lock)
2881
2882
2883static void
2884print_usage_bug_scenario(struct held_lock *lock)
2885{ 2977{
2886 struct lock_class *class = hlock_class(lock); 2978 struct lock_class *class = hlock_class(lock);
2887 2979
@@ -2898,12 +2990,12 @@ print_usage_bug_scenario(struct held_lock *lock)
2898 printk("\n *** DEADLOCK ***\n\n"); 2990 printk("\n *** DEADLOCK ***\n\n");
2899} 2991}
2900 2992
2901static int 2993static void
2902print_usage_bug(struct task_struct *curr, struct held_lock *this, 2994print_usage_bug(struct task_struct *curr, struct held_lock *this,
2903 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) 2995 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
2904{ 2996{
2905 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2997 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2906 return 0; 2998 return;
2907 2999
2908 pr_warn("\n"); 3000 pr_warn("\n");
2909 pr_warn("================================\n"); 3001 pr_warn("================================\n");
@@ -2933,8 +3025,6 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
2933 3025
2934 pr_warn("\nstack backtrace:\n"); 3026 pr_warn("\nstack backtrace:\n");
2935 dump_stack(); 3027 dump_stack();
2936
2937 return 0;
2938} 3028}
2939 3029
2940/* 3030/*
@@ -2944,8 +3034,10 @@ static inline int
2944valid_state(struct task_struct *curr, struct held_lock *this, 3034valid_state(struct task_struct *curr, struct held_lock *this,
2945 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) 3035 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
2946{ 3036{
2947 if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) 3037 if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) {
2948 return print_usage_bug(curr, this, bad_bit, new_bit); 3038 print_usage_bug(curr, this, bad_bit, new_bit);
3039 return 0;
3040 }
2949 return 1; 3041 return 1;
2950} 3042}
2951 3043
@@ -2953,7 +3045,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
2953/* 3045/*
2954 * print irq inversion bug: 3046 * print irq inversion bug:
2955 */ 3047 */
2956static int 3048static void
2957print_irq_inversion_bug(struct task_struct *curr, 3049print_irq_inversion_bug(struct task_struct *curr,
2958 struct lock_list *root, struct lock_list *other, 3050 struct lock_list *root, struct lock_list *other,
2959 struct held_lock *this, int forwards, 3051 struct held_lock *this, int forwards,
@@ -2964,7 +3056,7 @@ print_irq_inversion_bug(struct task_struct *curr,
2964 int depth; 3056 int depth;
2965 3057
2966 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 3058 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2967 return 0; 3059 return;
2968 3060
2969 pr_warn("\n"); 3061 pr_warn("\n");
2970 pr_warn("========================================================\n"); 3062 pr_warn("========================================================\n");
@@ -3005,13 +3097,11 @@ print_irq_inversion_bug(struct task_struct *curr,
3005 3097
3006 pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); 3098 pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
3007 if (!save_trace(&root->trace)) 3099 if (!save_trace(&root->trace))
3008 return 0; 3100 return;
3009 print_shortest_lock_dependencies(other, root); 3101 print_shortest_lock_dependencies(other, root);
3010 3102
3011 pr_warn("\nstack backtrace:\n"); 3103 pr_warn("\nstack backtrace:\n");
3012 dump_stack(); 3104 dump_stack();
3013
3014 return 0;
3015} 3105}
3016 3106
3017/* 3107/*
@@ -3029,13 +3119,16 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
3029 root.parent = NULL; 3119 root.parent = NULL;
3030 root.class = hlock_class(this); 3120 root.class = hlock_class(this);
3031 ret = find_usage_forwards(&root, lock_flag(bit), &target_entry); 3121 ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
3032 if (ret < 0) 3122 if (ret < 0) {
3033 return print_bfs_bug(ret); 3123 print_bfs_bug(ret);
3124 return 0;
3125 }
3034 if (ret == 1) 3126 if (ret == 1)
3035 return ret; 3127 return ret;
3036 3128
3037 return print_irq_inversion_bug(curr, &root, target_entry, 3129 print_irq_inversion_bug(curr, &root, target_entry,
3038 this, 1, irqclass); 3130 this, 1, irqclass);
3131 return 0;
3039} 3132}
3040 3133
3041/* 3134/*
@@ -3053,13 +3146,16 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
3053 root.parent = NULL; 3146 root.parent = NULL;
3054 root.class = hlock_class(this); 3147 root.class = hlock_class(this);
3055 ret = find_usage_backwards(&root, lock_flag(bit), &target_entry); 3148 ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
3056 if (ret < 0) 3149 if (ret < 0) {
3057 return print_bfs_bug(ret); 3150 print_bfs_bug(ret);
3151 return 0;
3152 }
3058 if (ret == 1) 3153 if (ret == 1)
3059 return ret; 3154 return ret;
3060 3155
3061 return print_irq_inversion_bug(curr, &root, target_entry, 3156 print_irq_inversion_bug(curr, &root, target_entry,
3062 this, 0, irqclass); 3157 this, 0, irqclass);
3158 return 0;
3063} 3159}
3064 3160
3065void print_irqtrace_events(struct task_struct *curr) 3161void print_irqtrace_events(struct task_struct *curr)
@@ -3142,7 +3238,7 @@ mark_lock_irq(struct task_struct *curr, struct held_lock *this,
3142 * Validate that the lock dependencies don't have conflicting usage 3238 * Validate that the lock dependencies don't have conflicting usage
3143 * states. 3239 * states.
3144 */ 3240 */
3145 if ((!read || !dir || STRICT_READ_CHECKS) && 3241 if ((!read || STRICT_READ_CHECKS) &&
3146 !usage(curr, this, excl_bit, state_name(new_bit & ~LOCK_USAGE_READ_MASK))) 3242 !usage(curr, this, excl_bit, state_name(new_bit & ~LOCK_USAGE_READ_MASK)))
3147 return 0; 3243 return 0;
3148 3244
@@ -3367,8 +3463,12 @@ void trace_softirqs_off(unsigned long ip)
3367 debug_atomic_inc(redundant_softirqs_off); 3463 debug_atomic_inc(redundant_softirqs_off);
3368} 3464}
3369 3465
3370static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) 3466static int
3467mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
3371{ 3468{
3469 if (!check)
3470 goto lock_used;
3471
3372 /* 3472 /*
3373 * If non-trylock use in a hardirq or softirq context, then 3473 * If non-trylock use in a hardirq or softirq context, then
3374 * mark the lock as used in these contexts: 3474 * mark the lock as used in these contexts:
@@ -3412,6 +3512,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
3412 } 3512 }
3413 } 3513 }
3414 3514
3515lock_used:
3516 /* mark it as used: */
3517 if (!mark_lock(curr, hlock, LOCK_USED))
3518 return 0;
3519
3415 return 1; 3520 return 1;
3416} 3521}
3417 3522
@@ -3443,35 +3548,6 @@ static int separate_irq_context(struct task_struct *curr,
3443 return 0; 3548 return 0;
3444} 3549}
3445 3550
3446#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
3447
3448static inline
3449int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
3450 enum lock_usage_bit new_bit)
3451{
3452 WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */
3453 return 1;
3454}
3455
3456static inline int mark_irqflags(struct task_struct *curr,
3457 struct held_lock *hlock)
3458{
3459 return 1;
3460}
3461
3462static inline unsigned int task_irq_context(struct task_struct *task)
3463{
3464 return 0;
3465}
3466
3467static inline int separate_irq_context(struct task_struct *curr,
3468 struct held_lock *hlock)
3469{
3470 return 0;
3471}
3472
3473#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
3474
3475/* 3551/*
3476 * Mark a lock with a usage bit, and validate the state transition: 3552 * Mark a lock with a usage bit, and validate the state transition:
3477 */ 3553 */
@@ -3480,6 +3556,11 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
3480{ 3556{
3481 unsigned int new_mask = 1 << new_bit, ret = 1; 3557 unsigned int new_mask = 1 << new_bit, ret = 1;
3482 3558
3559 if (new_bit >= LOCK_USAGE_STATES) {
3560 DEBUG_LOCKS_WARN_ON(1);
3561 return 0;
3562 }
3563
3483 /* 3564 /*
3484 * If already set then do not dirty the cacheline, 3565 * If already set then do not dirty the cacheline,
3485 * nor do any checks: 3566 * nor do any checks:
@@ -3503,25 +3584,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
3503 return 0; 3584 return 0;
3504 3585
3505 switch (new_bit) { 3586 switch (new_bit) {
3506#define LOCKDEP_STATE(__STATE) \
3507 case LOCK_USED_IN_##__STATE: \
3508 case LOCK_USED_IN_##__STATE##_READ: \
3509 case LOCK_ENABLED_##__STATE: \
3510 case LOCK_ENABLED_##__STATE##_READ:
3511#include "lockdep_states.h"
3512#undef LOCKDEP_STATE
3513 ret = mark_lock_irq(curr, this, new_bit);
3514 if (!ret)
3515 return 0;
3516 break;
3517 case LOCK_USED: 3587 case LOCK_USED:
3518 debug_atomic_dec(nr_unused_locks); 3588 debug_atomic_dec(nr_unused_locks);
3519 break; 3589 break;
3520 default: 3590 default:
3521 if (!debug_locks_off_graph_unlock()) 3591 ret = mark_lock_irq(curr, this, new_bit);
3592 if (!ret)
3522 return 0; 3593 return 0;
3523 WARN_ON(1);
3524 return 0;
3525 } 3594 }
3526 3595
3527 graph_unlock(); 3596 graph_unlock();
@@ -3539,6 +3608,27 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
3539 return ret; 3608 return ret;
3540} 3609}
3541 3610
3611#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
3612
3613static inline int
3614mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
3615{
3616 return 1;
3617}
3618
3619static inline unsigned int task_irq_context(struct task_struct *task)
3620{
3621 return 0;
3622}
3623
3624static inline int separate_irq_context(struct task_struct *curr,
3625 struct held_lock *hlock)
3626{
3627 return 0;
3628}
3629
3630#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
3631
3542/* 3632/*
3543 * Initialize a lock instance's lock-class mapping info: 3633 * Initialize a lock instance's lock-class mapping info:
3544 */ 3634 */
@@ -3602,15 +3692,15 @@ EXPORT_SYMBOL_GPL(lockdep_init_map);
3602struct lock_class_key __lockdep_no_validate__; 3692struct lock_class_key __lockdep_no_validate__;
3603EXPORT_SYMBOL_GPL(__lockdep_no_validate__); 3693EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
3604 3694
3605static int 3695static void
3606print_lock_nested_lock_not_held(struct task_struct *curr, 3696print_lock_nested_lock_not_held(struct task_struct *curr,
3607 struct held_lock *hlock, 3697 struct held_lock *hlock,
3608 unsigned long ip) 3698 unsigned long ip)
3609{ 3699{
3610 if (!debug_locks_off()) 3700 if (!debug_locks_off())
3611 return 0; 3701 return;
3612 if (debug_locks_silent) 3702 if (debug_locks_silent)
3613 return 0; 3703 return;
3614 3704
3615 pr_warn("\n"); 3705 pr_warn("\n");
3616 pr_warn("==================================\n"); 3706 pr_warn("==================================\n");
@@ -3632,8 +3722,6 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
3632 3722
3633 pr_warn("\nstack backtrace:\n"); 3723 pr_warn("\nstack backtrace:\n");
3634 dump_stack(); 3724 dump_stack();
3635
3636 return 0;
3637} 3725}
3638 3726
3639static int __lock_is_held(const struct lockdep_map *lock, int read); 3727static int __lock_is_held(const struct lockdep_map *lock, int read);
@@ -3698,24 +3786,24 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3698 if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) 3786 if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
3699 return 0; 3787 return 0;
3700 3788
3701 class_idx = class - lock_classes + 1; 3789 class_idx = class - lock_classes;
3702 3790
3703 if (depth) { 3791 if (depth) {
3704 hlock = curr->held_locks + depth - 1; 3792 hlock = curr->held_locks + depth - 1;
3705 if (hlock->class_idx == class_idx && nest_lock) { 3793 if (hlock->class_idx == class_idx && nest_lock) {
3706 if (hlock->references) { 3794 if (!references)
3707 /* 3795 references++;
3708 * Check: unsigned int references:12, overflow.
3709 */
3710 if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
3711 return 0;
3712 3796
3797 if (!hlock->references)
3713 hlock->references++; 3798 hlock->references++;
3714 } else {
3715 hlock->references = 2;
3716 }
3717 3799
3718 return 1; 3800 hlock->references += references;
3801
3802 /* Overflow */
3803 if (DEBUG_LOCKS_WARN_ON(hlock->references < references))
3804 return 0;
3805
3806 return 2;
3719 } 3807 }
3720 } 3808 }
3721 3809
@@ -3742,11 +3830,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3742#endif 3830#endif
3743 hlock->pin_count = pin_count; 3831 hlock->pin_count = pin_count;
3744 3832
3745 if (check && !mark_irqflags(curr, hlock)) 3833 /* Initialize the lock usage bit */
3746 return 0; 3834 if (!mark_usage(curr, hlock, check))
3747
3748 /* mark it as used: */
3749 if (!mark_lock(curr, hlock, LOCK_USED))
3750 return 0; 3835 return 0;
3751 3836
3752 /* 3837 /*
@@ -3760,9 +3845,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3760 * the hash, not class->key. 3845 * the hash, not class->key.
3761 */ 3846 */
3762 /* 3847 /*
3763 * Whoops, we did it again.. ran straight out of our static allocation. 3848 * Whoops, we did it again.. class_idx is invalid.
3764 */ 3849 */
3765 if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS)) 3850 if (DEBUG_LOCKS_WARN_ON(!test_bit(class_idx, lock_classes_in_use)))
3766 return 0; 3851 return 0;
3767 3852
3768 chain_key = curr->curr_chain_key; 3853 chain_key = curr->curr_chain_key;
@@ -3770,27 +3855,29 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3770 /* 3855 /*
3771 * How can we have a chain hash when we ain't got no keys?! 3856 * How can we have a chain hash when we ain't got no keys?!
3772 */ 3857 */
3773 if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) 3858 if (DEBUG_LOCKS_WARN_ON(chain_key != INITIAL_CHAIN_KEY))
3774 return 0; 3859 return 0;
3775 chain_head = 1; 3860 chain_head = 1;
3776 } 3861 }
3777 3862
3778 hlock->prev_chain_key = chain_key; 3863 hlock->prev_chain_key = chain_key;
3779 if (separate_irq_context(curr, hlock)) { 3864 if (separate_irq_context(curr, hlock)) {
3780 chain_key = 0; 3865 chain_key = INITIAL_CHAIN_KEY;
3781 chain_head = 1; 3866 chain_head = 1;
3782 } 3867 }
3783 chain_key = iterate_chain_key(chain_key, class_idx); 3868 chain_key = iterate_chain_key(chain_key, class_idx);
3784 3869
3785 if (nest_lock && !__lock_is_held(nest_lock, -1)) 3870 if (nest_lock && !__lock_is_held(nest_lock, -1)) {
3786 return print_lock_nested_lock_not_held(curr, hlock, ip); 3871 print_lock_nested_lock_not_held(curr, hlock, ip);
3872 return 0;
3873 }
3787 3874
3788 if (!debug_locks_silent) { 3875 if (!debug_locks_silent) {
3789 WARN_ON_ONCE(depth && !hlock_class(hlock - 1)->key); 3876 WARN_ON_ONCE(depth && !hlock_class(hlock - 1)->key);
3790 WARN_ON_ONCE(!hlock_class(hlock)->key); 3877 WARN_ON_ONCE(!hlock_class(hlock)->key);
3791 } 3878 }
3792 3879
3793 if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) 3880 if (!validate_chain(curr, hlock, chain_head, chain_key))
3794 return 0; 3881 return 0;
3795 3882
3796 curr->curr_chain_key = chain_key; 3883 curr->curr_chain_key = chain_key;
@@ -3819,14 +3906,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3819 return 1; 3906 return 1;
3820} 3907}
3821 3908
3822static int 3909static void print_unlock_imbalance_bug(struct task_struct *curr,
3823print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, 3910 struct lockdep_map *lock,
3824 unsigned long ip) 3911 unsigned long ip)
3825{ 3912{
3826 if (!debug_locks_off()) 3913 if (!debug_locks_off())
3827 return 0; 3914 return;
3828 if (debug_locks_silent) 3915 if (debug_locks_silent)
3829 return 0; 3916 return;
3830 3917
3831 pr_warn("\n"); 3918 pr_warn("\n");
3832 pr_warn("=====================================\n"); 3919 pr_warn("=====================================\n");
@@ -3844,8 +3931,6 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
3844 3931
3845 pr_warn("\nstack backtrace:\n"); 3932 pr_warn("\nstack backtrace:\n");
3846 dump_stack(); 3933 dump_stack();
3847
3848 return 0;
3849} 3934}
3850 3935
3851static int match_held_lock(const struct held_lock *hlock, 3936static int match_held_lock(const struct held_lock *hlock,
@@ -3877,7 +3962,7 @@ static int match_held_lock(const struct held_lock *hlock,
3877 if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) 3962 if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
3878 return 0; 3963 return 0;
3879 3964
3880 if (hlock->class_idx == class - lock_classes + 1) 3965 if (hlock->class_idx == class - lock_classes)
3881 return 1; 3966 return 1;
3882 } 3967 }
3883 3968
@@ -3921,22 +4006,33 @@ out:
3921} 4006}
3922 4007
3923static int reacquire_held_locks(struct task_struct *curr, unsigned int depth, 4008static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
3924 int idx) 4009 int idx, unsigned int *merged)
3925{ 4010{
3926 struct held_lock *hlock; 4011 struct held_lock *hlock;
4012 int first_idx = idx;
3927 4013
3928 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) 4014 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
3929 return 0; 4015 return 0;
3930 4016
3931 for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) { 4017 for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) {
3932 if (!__lock_acquire(hlock->instance, 4018 switch (__lock_acquire(hlock->instance,
3933 hlock_class(hlock)->subclass, 4019 hlock_class(hlock)->subclass,
3934 hlock->trylock, 4020 hlock->trylock,
3935 hlock->read, hlock->check, 4021 hlock->read, hlock->check,
3936 hlock->hardirqs_off, 4022 hlock->hardirqs_off,
3937 hlock->nest_lock, hlock->acquire_ip, 4023 hlock->nest_lock, hlock->acquire_ip,
3938 hlock->references, hlock->pin_count)) 4024 hlock->references, hlock->pin_count)) {
4025 case 0:
3939 return 1; 4026 return 1;
4027 case 1:
4028 break;
4029 case 2:
4030 *merged += (idx == first_idx);
4031 break;
4032 default:
4033 WARN_ON(1);
4034 return 0;
4035 }
3940 } 4036 }
3941 return 0; 4037 return 0;
3942} 4038}
@@ -3947,9 +4043,9 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
3947 unsigned long ip) 4043 unsigned long ip)
3948{ 4044{
3949 struct task_struct *curr = current; 4045 struct task_struct *curr = current;
4046 unsigned int depth, merged = 0;
3950 struct held_lock *hlock; 4047 struct held_lock *hlock;
3951 struct lock_class *class; 4048 struct lock_class *class;
3952 unsigned int depth;
3953 int i; 4049 int i;
3954 4050
3955 if (unlikely(!debug_locks)) 4051 if (unlikely(!debug_locks))
@@ -3964,24 +4060,26 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
3964 return 0; 4060 return 0;
3965 4061
3966 hlock = find_held_lock(curr, lock, depth, &i); 4062 hlock = find_held_lock(curr, lock, depth, &i);
3967 if (!hlock) 4063 if (!hlock) {
3968 return print_unlock_imbalance_bug(curr, lock, ip); 4064 print_unlock_imbalance_bug(curr, lock, ip);
4065 return 0;
4066 }
3969 4067
3970 lockdep_init_map(lock, name, key, 0); 4068 lockdep_init_map(lock, name, key, 0);
3971 class = register_lock_class(lock, subclass, 0); 4069 class = register_lock_class(lock, subclass, 0);
3972 hlock->class_idx = class - lock_classes + 1; 4070 hlock->class_idx = class - lock_classes;
3973 4071
3974 curr->lockdep_depth = i; 4072 curr->lockdep_depth = i;
3975 curr->curr_chain_key = hlock->prev_chain_key; 4073 curr->curr_chain_key = hlock->prev_chain_key;
3976 4074
3977 if (reacquire_held_locks(curr, depth, i)) 4075 if (reacquire_held_locks(curr, depth, i, &merged))
3978 return 0; 4076 return 0;
3979 4077
3980 /* 4078 /*
3981 * I took it apart and put it back together again, except now I have 4079 * I took it apart and put it back together again, except now I have
3982 * these 'spare' parts.. where shall I put them. 4080 * these 'spare' parts.. where shall I put them.
3983 */ 4081 */
3984 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth)) 4082 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - merged))
3985 return 0; 4083 return 0;
3986 return 1; 4084 return 1;
3987} 4085}
@@ -3989,8 +4087,8 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
3989static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip) 4087static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
3990{ 4088{
3991 struct task_struct *curr = current; 4089 struct task_struct *curr = current;
4090 unsigned int depth, merged = 0;
3992 struct held_lock *hlock; 4091 struct held_lock *hlock;
3993 unsigned int depth;
3994 int i; 4092 int i;
3995 4093
3996 if (unlikely(!debug_locks)) 4094 if (unlikely(!debug_locks))
@@ -4005,8 +4103,10 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
4005 return 0; 4103 return 0;
4006 4104
4007 hlock = find_held_lock(curr, lock, depth, &i); 4105 hlock = find_held_lock(curr, lock, depth, &i);
4008 if (!hlock) 4106 if (!hlock) {
4009 return print_unlock_imbalance_bug(curr, lock, ip); 4107 print_unlock_imbalance_bug(curr, lock, ip);
4108 return 0;
4109 }
4010 4110
4011 curr->lockdep_depth = i; 4111 curr->lockdep_depth = i;
4012 curr->curr_chain_key = hlock->prev_chain_key; 4112 curr->curr_chain_key = hlock->prev_chain_key;
@@ -4015,7 +4115,11 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
4015 hlock->read = 1; 4115 hlock->read = 1;
4016 hlock->acquire_ip = ip; 4116 hlock->acquire_ip = ip;
4017 4117
4018 if (reacquire_held_locks(curr, depth, i)) 4118 if (reacquire_held_locks(curr, depth, i, &merged))
4119 return 0;
4120
4121 /* Merging can't happen with unchanged classes.. */
4122 if (DEBUG_LOCKS_WARN_ON(merged))
4019 return 0; 4123 return 0;
4020 4124
4021 /* 4125 /*
@@ -4024,6 +4128,7 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
4024 */ 4128 */
4025 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth)) 4129 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
4026 return 0; 4130 return 0;
4131
4027 return 1; 4132 return 1;
4028} 4133}
4029 4134
@@ -4035,11 +4140,11 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
4035 * @nested is an hysterical artifact, needs a tree wide cleanup. 4140 * @nested is an hysterical artifact, needs a tree wide cleanup.
4036 */ 4141 */
4037static int 4142static int
4038__lock_release(struct lockdep_map *lock, int nested, unsigned long ip) 4143__lock_release(struct lockdep_map *lock, unsigned long ip)
4039{ 4144{
4040 struct task_struct *curr = current; 4145 struct task_struct *curr = current;
4146 unsigned int depth, merged = 1;
4041 struct held_lock *hlock; 4147 struct held_lock *hlock;
4042 unsigned int depth;
4043 int i; 4148 int i;
4044 4149
4045 if (unlikely(!debug_locks)) 4150 if (unlikely(!debug_locks))
@@ -4050,16 +4155,20 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
4050 * So we're all set to release this lock.. wait what lock? We don't 4155 * So we're all set to release this lock.. wait what lock? We don't
4051 * own any locks, you've been drinking again? 4156 * own any locks, you've been drinking again?
4052 */ 4157 */
4053 if (DEBUG_LOCKS_WARN_ON(depth <= 0)) 4158 if (depth <= 0) {
4054 return print_unlock_imbalance_bug(curr, lock, ip); 4159 print_unlock_imbalance_bug(curr, lock, ip);
4160 return 0;
4161 }
4055 4162
4056 /* 4163 /*
4057 * Check whether the lock exists in the current stack 4164 * Check whether the lock exists in the current stack
4058 * of held locks: 4165 * of held locks:
4059 */ 4166 */
4060 hlock = find_held_lock(curr, lock, depth, &i); 4167 hlock = find_held_lock(curr, lock, depth, &i);
4061 if (!hlock) 4168 if (!hlock) {
4062 return print_unlock_imbalance_bug(curr, lock, ip); 4169 print_unlock_imbalance_bug(curr, lock, ip);
4170 return 0;
4171 }
4063 4172
4064 if (hlock->instance == lock) 4173 if (hlock->instance == lock)
4065 lock_release_holdtime(hlock); 4174 lock_release_holdtime(hlock);
@@ -4094,14 +4203,15 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
4094 if (i == depth-1) 4203 if (i == depth-1)
4095 return 1; 4204 return 1;
4096 4205
4097 if (reacquire_held_locks(curr, depth, i + 1)) 4206 if (reacquire_held_locks(curr, depth, i + 1, &merged))
4098 return 0; 4207 return 0;
4099 4208
4100 /* 4209 /*
4101 * We had N bottles of beer on the wall, we drank one, but now 4210 * We had N bottles of beer on the wall, we drank one, but now
4102 * there's not N-1 bottles of beer left on the wall... 4211 * there's not N-1 bottles of beer left on the wall...
4212 * Pouring two of the bottles together is acceptable.
4103 */ 4213 */
4104 DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth-1); 4214 DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - merged);
4105 4215
4106 /* 4216 /*
4107 * Since reacquire_held_locks() would have called check_chain_key() 4217 * Since reacquire_held_locks() would have called check_chain_key()
@@ -4319,7 +4429,7 @@ void lock_release(struct lockdep_map *lock, int nested,
4319 check_flags(flags); 4429 check_flags(flags);
4320 current->lockdep_recursion = 1; 4430 current->lockdep_recursion = 1;
4321 trace_lock_release(lock, ip); 4431 trace_lock_release(lock, ip);
4322 if (__lock_release(lock, nested, ip)) 4432 if (__lock_release(lock, ip))
4323 check_chain_key(current); 4433 check_chain_key(current);
4324 current->lockdep_recursion = 0; 4434 current->lockdep_recursion = 0;
4325 raw_local_irq_restore(flags); 4435 raw_local_irq_restore(flags);
@@ -4402,14 +4512,14 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
4402EXPORT_SYMBOL_GPL(lock_unpin_lock); 4512EXPORT_SYMBOL_GPL(lock_unpin_lock);
4403 4513
4404#ifdef CONFIG_LOCK_STAT 4514#ifdef CONFIG_LOCK_STAT
4405static int 4515static void print_lock_contention_bug(struct task_struct *curr,
4406print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, 4516 struct lockdep_map *lock,
4407 unsigned long ip) 4517 unsigned long ip)
4408{ 4518{
4409 if (!debug_locks_off()) 4519 if (!debug_locks_off())
4410 return 0; 4520 return;
4411 if (debug_locks_silent) 4521 if (debug_locks_silent)
4412 return 0; 4522 return;
4413 4523
4414 pr_warn("\n"); 4524 pr_warn("\n");
4415 pr_warn("=================================\n"); 4525 pr_warn("=================================\n");
@@ -4427,8 +4537,6 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
4427 4537
4428 pr_warn("\nstack backtrace:\n"); 4538 pr_warn("\nstack backtrace:\n");
4429 dump_stack(); 4539 dump_stack();
4430
4431 return 0;
4432} 4540}
4433 4541
4434static void 4542static void
@@ -4573,9 +4681,7 @@ void lockdep_reset(void)
4573 int i; 4681 int i;
4574 4682
4575 raw_local_irq_save(flags); 4683 raw_local_irq_save(flags);
4576 current->curr_chain_key = 0; 4684 lockdep_init_task(current);
4577 current->lockdep_depth = 0;
4578 current->lockdep_recursion = 0;
4579 memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock)); 4685 memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
4580 nr_hardirq_chains = 0; 4686 nr_hardirq_chains = 0;
4581 nr_softirq_chains = 0; 4687 nr_softirq_chains = 0;
@@ -4615,9 +4721,9 @@ static void remove_class_from_lock_chain(struct pending_free *pf,
4615 return; 4721 return;
4616 4722
4617recalc: 4723recalc:
4618 chain_key = 0; 4724 chain_key = INITIAL_CHAIN_KEY;
4619 for (i = chain->base; i < chain->base + chain->depth; i++) 4725 for (i = chain->base; i < chain->base + chain->depth; i++)
4620 chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1); 4726 chain_key = iterate_chain_key(chain_key, chain_hlocks[i]);
4621 if (chain->depth && chain->chain_key == chain_key) 4727 if (chain->depth && chain->chain_key == chain_key)
4622 return; 4728 return;
4623 /* Overwrite the chain key for concurrent RCU readers. */ 4729 /* Overwrite the chain key for concurrent RCU readers. */
@@ -4691,6 +4797,7 @@ static void zap_class(struct pending_free *pf, struct lock_class *class)
4691 WRITE_ONCE(class->key, NULL); 4797 WRITE_ONCE(class->key, NULL);
4692 WRITE_ONCE(class->name, NULL); 4798 WRITE_ONCE(class->name, NULL);
4693 nr_lock_classes--; 4799 nr_lock_classes--;
4800 __clear_bit(class - lock_classes, lock_classes_in_use);
4694 } else { 4801 } else {
4695 WARN_ONCE(true, "%s() failed for class %s\n", __func__, 4802 WARN_ONCE(true, "%s() failed for class %s\n", __func__,
4696 class->name); 4803 class->name);
@@ -5036,6 +5143,7 @@ void __init lockdep_init(void)
5036 5143
5037 printk(" memory used by lock dependency info: %zu kB\n", 5144 printk(" memory used by lock dependency info: %zu kB\n",
5038 (sizeof(lock_classes) + 5145 (sizeof(lock_classes) +
5146 sizeof(lock_classes_in_use) +
5039 sizeof(classhash_table) + 5147 sizeof(classhash_table) +
5040 sizeof(list_entries) + 5148 sizeof(list_entries) +
5041 sizeof(list_entries_in_use) + 5149 sizeof(list_entries_in_use) +
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 150ec3f0c5b5..cc83568d5012 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -131,7 +131,6 @@ extern unsigned int nr_hardirq_chains;
131extern unsigned int nr_softirq_chains; 131extern unsigned int nr_softirq_chains;
132extern unsigned int nr_process_chains; 132extern unsigned int nr_process_chains;
133extern unsigned int max_lockdep_depth; 133extern unsigned int max_lockdep_depth;
134extern unsigned int max_recursion_depth;
135 134
136extern unsigned int max_bfs_queue_depth; 135extern unsigned int max_bfs_queue_depth;
137 136
@@ -160,25 +159,22 @@ lockdep_count_backward_deps(struct lock_class *class)
160 * and we want to avoid too much cache bouncing. 159 * and we want to avoid too much cache bouncing.
161 */ 160 */
162struct lockdep_stats { 161struct lockdep_stats {
163 int chain_lookup_hits; 162 unsigned long chain_lookup_hits;
164 int chain_lookup_misses; 163 unsigned int chain_lookup_misses;
165 int hardirqs_on_events; 164 unsigned long hardirqs_on_events;
166 int hardirqs_off_events; 165 unsigned long hardirqs_off_events;
167 int redundant_hardirqs_on; 166 unsigned long redundant_hardirqs_on;
168 int redundant_hardirqs_off; 167 unsigned long redundant_hardirqs_off;
169 int softirqs_on_events; 168 unsigned long softirqs_on_events;
170 int softirqs_off_events; 169 unsigned long softirqs_off_events;
171 int redundant_softirqs_on; 170 unsigned long redundant_softirqs_on;
172 int redundant_softirqs_off; 171 unsigned long redundant_softirqs_off;
173 int nr_unused_locks; 172 int nr_unused_locks;
174 int nr_redundant_checks; 173 unsigned int nr_redundant_checks;
175 int nr_redundant; 174 unsigned int nr_redundant;
176 int nr_cyclic_checks; 175 unsigned int nr_cyclic_checks;
177 int nr_cyclic_check_recursions; 176 unsigned int nr_find_usage_forwards_checks;
178 int nr_find_usage_forwards_checks; 177 unsigned int nr_find_usage_backwards_checks;
179 int nr_find_usage_forwards_recursions;
180 int nr_find_usage_backwards_checks;
181 int nr_find_usage_backwards_recursions;
182 178
183 /* 179 /*
184 * Per lock class locking operation stat counts 180 * Per lock class locking operation stat counts
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 80a463d31a8d..c513031cd7e3 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -975,7 +975,7 @@ static int __init lock_torture_init(void)
975 goto unwind; 975 goto unwind;
976 } 976 }
977 if (stutter > 0) { 977 if (stutter > 0) {
978 firsterr = torture_stutter_init(stutter); 978 firsterr = torture_stutter_init(stutter, stutter);
979 if (firsterr) 979 if (firsterr)
980 goto unwind; 980 goto unwind;
981 } 981 }
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index b6a9cc62099a..364d38a0c444 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
18 return -ENOMEM; 18 return -ENOMEM;
19 19
20 /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ 20 /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
21 rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); 21 rcu_sync_init(&sem->rss);
22 __init_rwsem(&sem->rw_sem, name, rwsem_key); 22 __init_rwsem(&sem->rw_sem, name, rwsem_key);
23 rcuwait_init(&sem->writer); 23 rcuwait_init(&sem->writer);
24 sem->readers_block = 0; 24 sem->readers_block = 0;
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
deleted file mode 100644
index 0b1f77957240..000000000000
--- a/kernel/locking/rwsem-xadd.c
+++ /dev/null
@@ -1,745 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/* rwsem.c: R/W semaphores: contention handling functions
3 *
4 * Written by David Howells (dhowells@redhat.com).
5 * Derived from arch/i386/kernel/semaphore.c
6 *
7 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
8 * and Michel Lespinasse <walken@google.com>
9 *
10 * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
11 * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
12 */
13#include <linux/rwsem.h>
14#include <linux/init.h>
15#include <linux/export.h>
16#include <linux/sched/signal.h>
17#include <linux/sched/rt.h>
18#include <linux/sched/wake_q.h>
19#include <linux/sched/debug.h>
20#include <linux/osq_lock.h>
21
22#include "rwsem.h"
23
24/*
25 * Guide to the rw_semaphore's count field for common values.
26 * (32-bit case illustrated, similar for 64-bit)
27 *
28 * 0x0000000X (1) X readers active or attempting lock, no writer waiting
29 * X = #active_readers + #readers attempting to lock
30 * (X*ACTIVE_BIAS)
31 *
32 * 0x00000000 rwsem is unlocked, and no one is waiting for the lock or
33 * attempting to read lock or write lock.
34 *
35 * 0xffff000X (1) X readers active or attempting lock, with waiters for lock
36 * X = #active readers + # readers attempting lock
37 * (X*ACTIVE_BIAS + WAITING_BIAS)
38 * (2) 1 writer attempting lock, no waiters for lock
39 * X-1 = #active readers + #readers attempting lock
40 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
41 * (3) 1 writer active, no waiters for lock
42 * X-1 = #active readers + #readers attempting lock
43 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
44 *
45 * 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock
46 * (WAITING_BIAS + ACTIVE_BIAS)
47 * (2) 1 writer active or attempting lock, no waiters for lock
48 * (ACTIVE_WRITE_BIAS)
49 *
50 * 0xffff0000 (1) There are writers or readers queued but none active
51 * or in the process of attempting lock.
52 * (WAITING_BIAS)
53 * Note: writer can attempt to steal lock for this count by adding
54 * ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
55 *
56 * 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue.
57 * (ACTIVE_WRITE_BIAS + WAITING_BIAS)
58 *
59 * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
60 * the count becomes more than 0 for successful lock acquisition,
61 * i.e. the case where there are only readers or nobody has lock.
62 * (1st and 2nd case above).
63 *
64 * Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
65 * checking the count becomes ACTIVE_WRITE_BIAS for successful lock
66 * acquisition (i.e. nobody else has lock or attempts lock). If
67 * unsuccessful, in rwsem_down_write_failed, we'll check to see if there
68 * are only waiters but none active (5th case above), and attempt to
69 * steal the lock.
70 *
71 */
72
73/*
74 * Initialize an rwsem:
75 */
76void __init_rwsem(struct rw_semaphore *sem, const char *name,
77 struct lock_class_key *key)
78{
79#ifdef CONFIG_DEBUG_LOCK_ALLOC
80 /*
81 * Make sure we are not reinitializing a held semaphore:
82 */
83 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
84 lockdep_init_map(&sem->dep_map, name, key, 0);
85#endif
86 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
87 raw_spin_lock_init(&sem->wait_lock);
88 INIT_LIST_HEAD(&sem->wait_list);
89#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
90 sem->owner = NULL;
91 osq_lock_init(&sem->osq);
92#endif
93}
94
95EXPORT_SYMBOL(__init_rwsem);
96
97enum rwsem_waiter_type {
98 RWSEM_WAITING_FOR_WRITE,
99 RWSEM_WAITING_FOR_READ
100};
101
102struct rwsem_waiter {
103 struct list_head list;
104 struct task_struct *task;
105 enum rwsem_waiter_type type;
106};
107
108enum rwsem_wake_type {
109 RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
110 RWSEM_WAKE_READERS, /* Wake readers only */
111 RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
112};
113
114/*
115 * handle the lock release when processes blocked on it that can now run
116 * - if we come here from up_xxxx(), then:
117 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
118 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
119 * - there must be someone on the queue
120 * - the wait_lock must be held by the caller
121 * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
122 * to actually wakeup the blocked task(s) and drop the reference count,
123 * preferably when the wait_lock is released
124 * - woken process blocks are discarded from the list after having task zeroed
125 * - writers are only marked woken if downgrading is false
126 */
127static void __rwsem_mark_wake(struct rw_semaphore *sem,
128 enum rwsem_wake_type wake_type,
129 struct wake_q_head *wake_q)
130{
131 struct rwsem_waiter *waiter, *tmp;
132 long oldcount, woken = 0, adjustment = 0;
133 struct list_head wlist;
134
135 /*
136 * Take a peek at the queue head waiter such that we can determine
137 * the wakeup(s) to perform.
138 */
139 waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
140
141 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
142 if (wake_type == RWSEM_WAKE_ANY) {
143 /*
144 * Mark writer at the front of the queue for wakeup.
145 * Until the task is actually later awoken later by
146 * the caller, other writers are able to steal it.
147 * Readers, on the other hand, will block as they
148 * will notice the queued writer.
149 */
150 wake_q_add(wake_q, waiter->task);
151 lockevent_inc(rwsem_wake_writer);
152 }
153
154 return;
155 }
156
157 /*
158 * Writers might steal the lock before we grant it to the next reader.
159 * We prefer to do the first reader grant before counting readers
160 * so we can bail out early if a writer stole the lock.
161 */
162 if (wake_type != RWSEM_WAKE_READ_OWNED) {
163 adjustment = RWSEM_ACTIVE_READ_BIAS;
164 try_reader_grant:
165 oldcount = atomic_long_fetch_add(adjustment, &sem->count);
166 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
167 /*
168 * If the count is still less than RWSEM_WAITING_BIAS
169 * after removing the adjustment, it is assumed that
170 * a writer has stolen the lock. We have to undo our
171 * reader grant.
172 */
173 if (atomic_long_add_return(-adjustment, &sem->count) <
174 RWSEM_WAITING_BIAS)
175 return;
176
177 /* Last active locker left. Retry waking readers. */
178 goto try_reader_grant;
179 }
180 /*
181 * Set it to reader-owned to give spinners an early
182 * indication that readers now have the lock.
183 */
184 __rwsem_set_reader_owned(sem, waiter->task);
185 }
186
187 /*
188 * Grant an infinite number of read locks to the readers at the front
189 * of the queue. We know that woken will be at least 1 as we accounted
190 * for above. Note we increment the 'active part' of the count by the
191 * number of readers before waking any processes up.
192 *
193 * We have to do wakeup in 2 passes to prevent the possibility that
194 * the reader count may be decremented before it is incremented. It
195 * is because the to-be-woken waiter may not have slept yet. So it
196 * may see waiter->task got cleared, finish its critical section and
197 * do an unlock before the reader count increment.
198 *
199 * 1) Collect the read-waiters in a separate list, count them and
200 * fully increment the reader count in rwsem.
201 * 2) For each waiters in the new list, clear waiter->task and
202 * put them into wake_q to be woken up later.
203 */
204 list_for_each_entry(waiter, &sem->wait_list, list) {
205 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
206 break;
207
208 woken++;
209 }
210 list_cut_before(&wlist, &sem->wait_list, &waiter->list);
211
212 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
213 lockevent_cond_inc(rwsem_wake_reader, woken);
214 if (list_empty(&sem->wait_list)) {
215 /* hit end of list above */
216 adjustment -= RWSEM_WAITING_BIAS;
217 }
218
219 if (adjustment)
220 atomic_long_add(adjustment, &sem->count);
221
222 /* 2nd pass */
223 list_for_each_entry_safe(waiter, tmp, &wlist, list) {
224 struct task_struct *tsk;
225
226 tsk = waiter->task;
227 get_task_struct(tsk);
228
229 /*
230 * Ensure calling get_task_struct() before setting the reader
231 * waiter to nil such that rwsem_down_read_failed() cannot
232 * race with do_exit() by always holding a reference count
233 * to the task to wakeup.
234 */
235 smp_store_release(&waiter->task, NULL);
236 /*
237 * Ensure issuing the wakeup (either by us or someone else)
238 * after setting the reader waiter to nil.
239 */
240 wake_q_add_safe(wake_q, tsk);
241 }
242}
243
244/*
245 * This function must be called with the sem->wait_lock held to prevent
246 * race conditions between checking the rwsem wait list and setting the
247 * sem->count accordingly.
248 */
249static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
250{
251 /*
252 * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS.
253 */
254 if (count != RWSEM_WAITING_BIAS)
255 return false;
256
257 /*
258 * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there
259 * are other tasks on the wait list, we need to add on WAITING_BIAS.
260 */
261 count = list_is_singular(&sem->wait_list) ?
262 RWSEM_ACTIVE_WRITE_BIAS :
263 RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
264
265 if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
266 == RWSEM_WAITING_BIAS) {
267 rwsem_set_owner(sem);
268 return true;
269 }
270
271 return false;
272}
273
274#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
275/*
276 * Try to acquire write lock before the writer has been put on wait queue.
277 */
278static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
279{
280 long count = atomic_long_read(&sem->count);
281
282 while (!count || count == RWSEM_WAITING_BIAS) {
283 if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
284 count + RWSEM_ACTIVE_WRITE_BIAS)) {
285 rwsem_set_owner(sem);
286 lockevent_inc(rwsem_opt_wlock);
287 return true;
288 }
289 }
290 return false;
291}
292
293static inline bool owner_on_cpu(struct task_struct *owner)
294{
295 /*
296 * As lock holder preemption issue, we both skip spinning if
297 * task is not on cpu or its cpu is preempted
298 */
299 return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
300}
301
302static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
303{
304 struct task_struct *owner;
305 bool ret = true;
306
307 BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
308
309 if (need_resched())
310 return false;
311
312 rcu_read_lock();
313 owner = READ_ONCE(sem->owner);
314 if (owner) {
315 ret = is_rwsem_owner_spinnable(owner) &&
316 owner_on_cpu(owner);
317 }
318 rcu_read_unlock();
319 return ret;
320}
321
322/*
323 * Return true only if we can still spin on the owner field of the rwsem.
324 */
325static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
326{
327 struct task_struct *owner = READ_ONCE(sem->owner);
328
329 if (!is_rwsem_owner_spinnable(owner))
330 return false;
331
332 rcu_read_lock();
333 while (owner && (READ_ONCE(sem->owner) == owner)) {
334 /*
335 * Ensure we emit the owner->on_cpu, dereference _after_
336 * checking sem->owner still matches owner, if that fails,
337 * owner might point to free()d memory, if it still matches,
338 * the rcu_read_lock() ensures the memory stays valid.
339 */
340 barrier();
341
342 /*
343 * abort spinning when need_resched or owner is not running or
344 * owner's cpu is preempted.
345 */
346 if (need_resched() || !owner_on_cpu(owner)) {
347 rcu_read_unlock();
348 return false;
349 }
350
351 cpu_relax();
352 }
353 rcu_read_unlock();
354
355 /*
356 * If there is a new owner or the owner is not set, we continue
357 * spinning.
358 */
359 return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
360}
361
362static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
363{
364 bool taken = false;
365
366 preempt_disable();
367
368 /* sem->wait_lock should not be held when doing optimistic spinning */
369 if (!rwsem_can_spin_on_owner(sem))
370 goto done;
371
372 if (!osq_lock(&sem->osq))
373 goto done;
374
375 /*
376 * Optimistically spin on the owner field and attempt to acquire the
377 * lock whenever the owner changes. Spinning will be stopped when:
378 * 1) the owning writer isn't running; or
379 * 2) readers own the lock as we can't determine if they are
380 * actively running or not.
381 */
382 while (rwsem_spin_on_owner(sem)) {
383 /*
384 * Try to acquire the lock
385 */
386 if (rwsem_try_write_lock_unqueued(sem)) {
387 taken = true;
388 break;
389 }
390
391 /*
392 * When there's no owner, we might have preempted between the
393 * owner acquiring the lock and setting the owner field. If
394 * we're an RT task that will live-lock because we won't let
395 * the owner complete.
396 */
397 if (!sem->owner && (need_resched() || rt_task(current)))
398 break;
399
400 /*
401 * The cpu_relax() call is a compiler barrier which forces
402 * everything in this loop to be re-loaded. We don't need
403 * memory barriers as we'll eventually observe the right
404 * values at the cost of a few extra spins.
405 */
406 cpu_relax();
407 }
408 osq_unlock(&sem->osq);
409done:
410 preempt_enable();
411 lockevent_cond_inc(rwsem_opt_fail, !taken);
412 return taken;
413}
414
415/*
416 * Return true if the rwsem has active spinner
417 */
418static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
419{
420 return osq_is_locked(&sem->osq);
421}
422
423#else
424static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
425{
426 return false;
427}
428
429static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
430{
431 return false;
432}
433#endif
434
435/*
436 * Wait for the read lock to be granted
437 */
438static inline struct rw_semaphore __sched *
439__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
440{
441 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
442 struct rwsem_waiter waiter;
443 DEFINE_WAKE_Q(wake_q);
444
445 waiter.task = current;
446 waiter.type = RWSEM_WAITING_FOR_READ;
447
448 raw_spin_lock_irq(&sem->wait_lock);
449 if (list_empty(&sem->wait_list)) {
450 /*
451 * In case the wait queue is empty and the lock isn't owned
452 * by a writer, this reader can exit the slowpath and return
453 * immediately as its RWSEM_ACTIVE_READ_BIAS has already
454 * been set in the count.
455 */
456 if (atomic_long_read(&sem->count) >= 0) {
457 raw_spin_unlock_irq(&sem->wait_lock);
458 rwsem_set_reader_owned(sem);
459 lockevent_inc(rwsem_rlock_fast);
460 return sem;
461 }
462 adjustment += RWSEM_WAITING_BIAS;
463 }
464 list_add_tail(&waiter.list, &sem->wait_list);
465
466 /* we're now waiting on the lock, but no longer actively locking */
467 count = atomic_long_add_return(adjustment, &sem->count);
468
469 /*
470 * If there are no active locks, wake the front queued process(es).
471 *
472 * If there are no writers and we are first in the queue,
473 * wake our own waiter to join the existing active readers !
474 */
475 if (count == RWSEM_WAITING_BIAS ||
476 (count > RWSEM_WAITING_BIAS &&
477 adjustment != -RWSEM_ACTIVE_READ_BIAS))
478 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
479
480 raw_spin_unlock_irq(&sem->wait_lock);
481 wake_up_q(&wake_q);
482
483 /* wait to be given the lock */
484 while (true) {
485 set_current_state(state);
486 if (!waiter.task)
487 break;
488 if (signal_pending_state(state, current)) {
489 raw_spin_lock_irq(&sem->wait_lock);
490 if (waiter.task)
491 goto out_nolock;
492 raw_spin_unlock_irq(&sem->wait_lock);
493 break;
494 }
495 schedule();
496 lockevent_inc(rwsem_sleep_reader);
497 }
498
499 __set_current_state(TASK_RUNNING);
500 lockevent_inc(rwsem_rlock);
501 return sem;
502out_nolock:
503 list_del(&waiter.list);
504 if (list_empty(&sem->wait_list))
505 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
506 raw_spin_unlock_irq(&sem->wait_lock);
507 __set_current_state(TASK_RUNNING);
508 lockevent_inc(rwsem_rlock_fail);
509 return ERR_PTR(-EINTR);
510}
511
512__visible struct rw_semaphore * __sched
513rwsem_down_read_failed(struct rw_semaphore *sem)
514{
515 return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
516}
517EXPORT_SYMBOL(rwsem_down_read_failed);
518
519__visible struct rw_semaphore * __sched
520rwsem_down_read_failed_killable(struct rw_semaphore *sem)
521{
522 return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
523}
524EXPORT_SYMBOL(rwsem_down_read_failed_killable);
525
526/*
527 * Wait until we successfully acquire the write lock
528 */
529static inline struct rw_semaphore *
530__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
531{
532 long count;
533 bool waiting = true; /* any queued threads before us */
534 struct rwsem_waiter waiter;
535 struct rw_semaphore *ret = sem;
536 DEFINE_WAKE_Q(wake_q);
537
538 /* undo write bias from down_write operation, stop active locking */
539 count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
540
541 /* do optimistic spinning and steal lock if possible */
542 if (rwsem_optimistic_spin(sem))
543 return sem;
544
545 /*
546 * Optimistic spinning failed, proceed to the slowpath
547 * and block until we can acquire the sem.
548 */
549 waiter.task = current;
550 waiter.type = RWSEM_WAITING_FOR_WRITE;
551
552 raw_spin_lock_irq(&sem->wait_lock);
553
554 /* account for this before adding a new element to the list */
555 if (list_empty(&sem->wait_list))
556 waiting = false;
557
558 list_add_tail(&waiter.list, &sem->wait_list);
559
560 /* we're now waiting on the lock, but no longer actively locking */
561 if (waiting) {
562 count = atomic_long_read(&sem->count);
563
564 /*
565 * If there were already threads queued before us and there are
566 * no active writers, the lock must be read owned; so we try to
567 * wake any read locks that were queued ahead of us.
568 */
569 if (count > RWSEM_WAITING_BIAS) {
570 __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
571 /*
572 * The wakeup is normally called _after_ the wait_lock
573 * is released, but given that we are proactively waking
574 * readers we can deal with the wake_q overhead as it is
575 * similar to releasing and taking the wait_lock again
576 * for attempting rwsem_try_write_lock().
577 */
578 wake_up_q(&wake_q);
579
580 /*
581 * Reinitialize wake_q after use.
582 */
583 wake_q_init(&wake_q);
584 }
585
586 } else
587 count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
588
589 /* wait until we successfully acquire the lock */
590 set_current_state(state);
591 while (true) {
592 if (rwsem_try_write_lock(count, sem))
593 break;
594 raw_spin_unlock_irq(&sem->wait_lock);
595
596 /* Block until there are no active lockers. */
597 do {
598 if (signal_pending_state(state, current))
599 goto out_nolock;
600
601 schedule();
602 lockevent_inc(rwsem_sleep_writer);
603 set_current_state(state);
604 } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
605
606 raw_spin_lock_irq(&sem->wait_lock);
607 }
608 __set_current_state(TASK_RUNNING);
609 list_del(&waiter.list);
610 raw_spin_unlock_irq(&sem->wait_lock);
611 lockevent_inc(rwsem_wlock);
612
613 return ret;
614
615out_nolock:
616 __set_current_state(TASK_RUNNING);
617 raw_spin_lock_irq(&sem->wait_lock);
618 list_del(&waiter.list);
619 if (list_empty(&sem->wait_list))
620 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
621 else
622 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
623 raw_spin_unlock_irq(&sem->wait_lock);
624 wake_up_q(&wake_q);
625 lockevent_inc(rwsem_wlock_fail);
626
627 return ERR_PTR(-EINTR);
628}
629
630__visible struct rw_semaphore * __sched
631rwsem_down_write_failed(struct rw_semaphore *sem)
632{
633 return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
634}
635EXPORT_SYMBOL(rwsem_down_write_failed);
636
637__visible struct rw_semaphore * __sched
638rwsem_down_write_failed_killable(struct rw_semaphore *sem)
639{
640 return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
641}
642EXPORT_SYMBOL(rwsem_down_write_failed_killable);
643
644/*
645 * handle waking up a waiter on the semaphore
646 * - up_read/up_write has decremented the active part of count if we come here
647 */
648__visible
649struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
650{
651 unsigned long flags;
652 DEFINE_WAKE_Q(wake_q);
653
654 /*
655 * __rwsem_down_write_failed_common(sem)
656 * rwsem_optimistic_spin(sem)
657 * osq_unlock(sem->osq)
658 * ...
659 * atomic_long_add_return(&sem->count)
660 *
661 * - VS -
662 *
663 * __up_write()
664 * if (atomic_long_sub_return_release(&sem->count) < 0)
665 * rwsem_wake(sem)
666 * osq_is_locked(&sem->osq)
667 *
668 * And __up_write() must observe !osq_is_locked() when it observes the
669 * atomic_long_add_return() in order to not miss a wakeup.
670 *
671 * This boils down to:
672 *
673 * [S.rel] X = 1 [RmW] r0 = (Y += 0)
674 * MB RMB
675 * [RmW] Y += 1 [L] r1 = X
676 *
677 * exists (r0=1 /\ r1=0)
678 */
679 smp_rmb();
680
681 /*
682 * If a spinner is present, it is not necessary to do the wakeup.
683 * Try to do wakeup only if the trylock succeeds to minimize
684 * spinlock contention which may introduce too much delay in the
685 * unlock operation.
686 *
687 * spinning writer up_write/up_read caller
688 * --------------- -----------------------
689 * [S] osq_unlock() [L] osq
690 * MB RMB
691 * [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
692 *
693 * Here, it is important to make sure that there won't be a missed
694 * wakeup while the rwsem is free and the only spinning writer goes
695 * to sleep without taking the rwsem. Even when the spinning writer
696 * is just going to break out of the waiting loop, it will still do
697 * a trylock in rwsem_down_write_failed() before sleeping. IOW, if
698 * rwsem_has_spinner() is true, it will guarantee at least one
699 * trylock attempt on the rwsem later on.
700 */
701 if (rwsem_has_spinner(sem)) {
702 /*
703 * The smp_rmb() here is to make sure that the spinner
704 * state is consulted before reading the wait_lock.
705 */
706 smp_rmb();
707 if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
708 return sem;
709 goto locked;
710 }
711 raw_spin_lock_irqsave(&sem->wait_lock, flags);
712locked:
713
714 if (!list_empty(&sem->wait_list))
715 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
716
717 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
718 wake_up_q(&wake_q);
719
720 return sem;
721}
722EXPORT_SYMBOL(rwsem_wake);
723
724/*
725 * downgrade a write lock into a read lock
726 * - caller incremented waiting part of count and discovered it still negative
727 * - just wake up any readers at the front of the queue
728 */
729__visible
730struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
731{
732 unsigned long flags;
733 DEFINE_WAKE_Q(wake_q);
734
735 raw_spin_lock_irqsave(&sem->wait_lock, flags);
736
737 if (!list_empty(&sem->wait_list))
738 __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
739
740 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
741 wake_up_q(&wake_q);
742
743 return sem;
744}
745EXPORT_SYMBOL(rwsem_downgrade_wake);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index ccbf18f560ff..37524a47f002 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -3,17 +3,1438 @@
3 * 3 *
4 * Written by David Howells (dhowells@redhat.com). 4 * Written by David Howells (dhowells@redhat.com).
5 * Derived from asm-i386/semaphore.h 5 * Derived from asm-i386/semaphore.h
6 *
7 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
8 * and Michel Lespinasse <walken@google.com>
9 *
10 * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
11 * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
12 *
13 * Rwsem count bit fields re-definition and rwsem rearchitecture by
14 * Waiman Long <longman@redhat.com> and
15 * Peter Zijlstra <peterz@infradead.org>.
6 */ 16 */
7 17
8#include <linux/types.h> 18#include <linux/types.h>
9#include <linux/kernel.h> 19#include <linux/kernel.h>
10#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/sched/rt.h>
22#include <linux/sched/task.h>
11#include <linux/sched/debug.h> 23#include <linux/sched/debug.h>
24#include <linux/sched/wake_q.h>
25#include <linux/sched/signal.h>
26#include <linux/sched/clock.h>
12#include <linux/export.h> 27#include <linux/export.h>
13#include <linux/rwsem.h> 28#include <linux/rwsem.h>
14#include <linux/atomic.h> 29#include <linux/atomic.h>
15 30
16#include "rwsem.h" 31#include "rwsem.h"
32#include "lock_events.h"
33
34/*
35 * The least significant 3 bits of the owner value has the following
36 * meanings when set.
37 * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
38 * - Bit 1: RWSEM_RD_NONSPINNABLE - Readers cannot spin on this lock.
39 * - Bit 2: RWSEM_WR_NONSPINNABLE - Writers cannot spin on this lock.
40 *
41 * When the rwsem is either owned by an anonymous writer, or it is
42 * reader-owned, but a spinning writer has timed out, both nonspinnable
43 * bits will be set to disable optimistic spinning by readers and writers.
44 * In the later case, the last unlocking reader should then check the
45 * writer nonspinnable bit and clear it only to give writers preference
46 * to acquire the lock via optimistic spinning, but not readers. Similar
47 * action is also done in the reader slowpath.
48
49 * When a writer acquires a rwsem, it puts its task_struct pointer
50 * into the owner field. It is cleared after an unlock.
51 *
52 * When a reader acquires a rwsem, it will also puts its task_struct
53 * pointer into the owner field with the RWSEM_READER_OWNED bit set.
54 * On unlock, the owner field will largely be left untouched. So
55 * for a free or reader-owned rwsem, the owner value may contain
56 * information about the last reader that acquires the rwsem.
57 *
58 * That information may be helpful in debugging cases where the system
59 * seems to hang on a reader owned rwsem especially if only one reader
60 * is involved. Ideally we would like to track all the readers that own
61 * a rwsem, but the overhead is simply too big.
62 *
63 * Reader optimistic spinning is helpful when the reader critical section
64 * is short and there aren't that many readers around. It makes readers
65 * relatively more preferred than writers. When a writer times out spinning
66 * on a reader-owned lock and set the nospinnable bits, there are two main
67 * reasons for that.
68 *
69 * 1) The reader critical section is long, perhaps the task sleeps after
70 * acquiring the read lock.
71 * 2) There are just too many readers contending the lock causing it to
72 * take a while to service all of them.
73 *
74 * In the former case, long reader critical section will impede the progress
75 * of writers which is usually more important for system performance. In
76 * the later case, reader optimistic spinning tends to make the reader
77 * groups that contain readers that acquire the lock together smaller
78 * leading to more of them. That may hurt performance in some cases. In
79 * other words, the setting of nonspinnable bits indicates that reader
80 * optimistic spinning may not be helpful for those workloads that cause
81 * it.
82 *
83 * Therefore, any writers that had observed the setting of the writer
84 * nonspinnable bit for a given rwsem after they fail to acquire the lock
85 * via optimistic spinning will set the reader nonspinnable bit once they
86 * acquire the write lock. Similarly, readers that observe the setting
87 * of reader nonspinnable bit at slowpath entry will set the reader
88 * nonspinnable bits when they acquire the read lock via the wakeup path.
89 *
90 * Once the reader nonspinnable bit is on, it will only be reset when
91 * a writer is able to acquire the rwsem in the fast path or somehow a
92 * reader or writer in the slowpath doesn't observe the nonspinable bit.
93 *
94 * This is to discourage reader optmistic spinning on that particular
95 * rwsem and make writers more preferred. This adaptive disabling of reader
96 * optimistic spinning will alleviate the negative side effect of this
97 * feature.
98 */
99#define RWSEM_READER_OWNED (1UL << 0)
100#define RWSEM_RD_NONSPINNABLE (1UL << 1)
101#define RWSEM_WR_NONSPINNABLE (1UL << 2)
102#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE)
103#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
104
105#ifdef CONFIG_DEBUG_RWSEMS
106# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
107 if (!debug_locks_silent && \
108 WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
109 #c, atomic_long_read(&(sem)->count), \
110 atomic_long_read(&(sem)->owner), (long)current, \
111 list_empty(&(sem)->wait_list) ? "" : "not ")) \
112 debug_locks_off(); \
113 } while (0)
114#else
115# define DEBUG_RWSEMS_WARN_ON(c, sem)
116#endif
117
118/*
119 * On 64-bit architectures, the bit definitions of the count are:
120 *
121 * Bit 0 - writer locked bit
122 * Bit 1 - waiters present bit
123 * Bit 2 - lock handoff bit
124 * Bits 3-7 - reserved
125 * Bits 8-62 - 55-bit reader count
126 * Bit 63 - read fail bit
127 *
128 * On 32-bit architectures, the bit definitions of the count are:
129 *
130 * Bit 0 - writer locked bit
131 * Bit 1 - waiters present bit
132 * Bit 2 - lock handoff bit
133 * Bits 3-7 - reserved
134 * Bits 8-30 - 23-bit reader count
135 * Bit 31 - read fail bit
136 *
137 * It is not likely that the most significant bit (read fail bit) will ever
138 * be set. This guard bit is still checked anyway in the down_read() fastpath
139 * just in case we need to use up more of the reader bits for other purpose
140 * in the future.
141 *
142 * atomic_long_fetch_add() is used to obtain reader lock, whereas
143 * atomic_long_cmpxchg() will be used to obtain writer lock.
144 *
145 * There are three places where the lock handoff bit may be set or cleared.
146 * 1) rwsem_mark_wake() for readers.
147 * 2) rwsem_try_write_lock() for writers.
148 * 3) Error path of rwsem_down_write_slowpath().
149 *
150 * For all the above cases, wait_lock will be held. A writer must also
151 * be the first one in the wait_list to be eligible for setting the handoff
152 * bit. So concurrent setting/clearing of handoff bit is not possible.
153 */
154#define RWSEM_WRITER_LOCKED (1UL << 0)
155#define RWSEM_FLAG_WAITERS (1UL << 1)
156#define RWSEM_FLAG_HANDOFF (1UL << 2)
157#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
158
159#define RWSEM_READER_SHIFT 8
160#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
161#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
162#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
163#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
164#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
165 RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
166
167/*
168 * All writes to owner are protected by WRITE_ONCE() to make sure that
169 * store tearing can't happen as optimistic spinners may read and use
170 * the owner value concurrently without lock. Read from owner, however,
171 * may not need READ_ONCE() as long as the pointer value is only used
172 * for comparison and isn't being dereferenced.
173 */
174static inline void rwsem_set_owner(struct rw_semaphore *sem)
175{
176 atomic_long_set(&sem->owner, (long)current);
177}
178
179static inline void rwsem_clear_owner(struct rw_semaphore *sem)
180{
181 atomic_long_set(&sem->owner, 0);
182}
183
184/*
185 * Test the flags in the owner field.
186 */
187static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
188{
189 return atomic_long_read(&sem->owner) & flags;
190}
191
192/*
193 * The task_struct pointer of the last owning reader will be left in
194 * the owner field.
195 *
196 * Note that the owner value just indicates the task has owned the rwsem
197 * previously, it may not be the real owner or one of the real owners
198 * anymore when that field is examined, so take it with a grain of salt.
199 *
200 * The reader non-spinnable bit is preserved.
201 */
202static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
203 struct task_struct *owner)
204{
205 unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
206 (atomic_long_read(&sem->owner) & RWSEM_RD_NONSPINNABLE);
207
208 atomic_long_set(&sem->owner, val);
209}
210
211static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
212{
213 __rwsem_set_reader_owned(sem, current);
214}
215
216/*
217 * Return true if the rwsem is owned by a reader.
218 */
219static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
220{
221#ifdef CONFIG_DEBUG_RWSEMS
222 /*
223 * Check the count to see if it is write-locked.
224 */
225 long count = atomic_long_read(&sem->count);
226
227 if (count & RWSEM_WRITER_MASK)
228 return false;
229#endif
230 return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
231}
232
233#ifdef CONFIG_DEBUG_RWSEMS
234/*
235 * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
236 * is a task pointer in owner of a reader-owned rwsem, it will be the
237 * real owner or one of the real owners. The only exception is when the
238 * unlock is done by up_read_non_owner().
239 */
240static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
241{
242 unsigned long val = atomic_long_read(&sem->owner);
243
244 while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
245 if (atomic_long_try_cmpxchg(&sem->owner, &val,
246 val & RWSEM_OWNER_FLAGS_MASK))
247 return;
248 }
249}
250#else
251static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
252{
253}
254#endif
255
256/*
257 * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
258 * remains set. Otherwise, the operation will be aborted.
259 */
260static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
261{
262 unsigned long owner = atomic_long_read(&sem->owner);
263
264 do {
265 if (!(owner & RWSEM_READER_OWNED))
266 break;
267 if (owner & RWSEM_NONSPINNABLE)
268 break;
269 } while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
270 owner | RWSEM_NONSPINNABLE));
271}
272
273static inline bool rwsem_read_trylock(struct rw_semaphore *sem)
274{
275 long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
276 if (WARN_ON_ONCE(cnt < 0))
277 rwsem_set_nonspinnable(sem);
278 return !(cnt & RWSEM_READ_FAILED_MASK);
279}
280
281/*
282 * Return just the real task structure pointer of the owner
283 */
284static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
285{
286 return (struct task_struct *)
287 (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
288}
289
290/*
291 * Return the real task structure pointer of the owner and the embedded
292 * flags in the owner. pflags must be non-NULL.
293 */
294static inline struct task_struct *
295rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
296{
297 unsigned long owner = atomic_long_read(&sem->owner);
298
299 *pflags = owner & RWSEM_OWNER_FLAGS_MASK;
300 return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
301}
302
303/*
304 * Guide to the rw_semaphore's count field.
305 *
306 * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
307 * by a writer.
308 *
309 * The lock is owned by readers when
310 * (1) the RWSEM_WRITER_LOCKED isn't set in count,
311 * (2) some of the reader bits are set in count, and
312 * (3) the owner field has RWSEM_READ_OWNED bit set.
313 *
314 * Having some reader bits set is not enough to guarantee a readers owned
315 * lock as the readers may be in the process of backing out from the count
316 * and a writer has just released the lock. So another writer may steal
317 * the lock immediately after that.
318 */
319
320/*
321 * Initialize an rwsem:
322 */
323void __init_rwsem(struct rw_semaphore *sem, const char *name,
324 struct lock_class_key *key)
325{
326#ifdef CONFIG_DEBUG_LOCK_ALLOC
327 /*
328 * Make sure we are not reinitializing a held semaphore:
329 */
330 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
331 lockdep_init_map(&sem->dep_map, name, key, 0);
332#endif
333 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
334 raw_spin_lock_init(&sem->wait_lock);
335 INIT_LIST_HEAD(&sem->wait_list);
336 atomic_long_set(&sem->owner, 0L);
337#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
338 osq_lock_init(&sem->osq);
339#endif
340}
341EXPORT_SYMBOL(__init_rwsem);
342
343enum rwsem_waiter_type {
344 RWSEM_WAITING_FOR_WRITE,
345 RWSEM_WAITING_FOR_READ
346};
347
348struct rwsem_waiter {
349 struct list_head list;
350 struct task_struct *task;
351 enum rwsem_waiter_type type;
352 unsigned long timeout;
353 unsigned long last_rowner;
354};
355#define rwsem_first_waiter(sem) \
356 list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
357
358enum rwsem_wake_type {
359 RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
360 RWSEM_WAKE_READERS, /* Wake readers only */
361 RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
362};
363
364enum writer_wait_state {
365 WRITER_NOT_FIRST, /* Writer is not first in wait list */
366 WRITER_FIRST, /* Writer is first in wait list */
367 WRITER_HANDOFF /* Writer is first & handoff needed */
368};
369
370/*
371 * The typical HZ value is either 250 or 1000. So set the minimum waiting
372 * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
373 * queue before initiating the handoff protocol.
374 */
375#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
376
377/*
378 * Magic number to batch-wakeup waiting readers, even when writers are
379 * also present in the queue. This both limits the amount of work the
380 * waking thread must do and also prevents any potential counter overflow,
381 * however unlikely.
382 */
383#define MAX_READERS_WAKEUP 0x100
384
385/*
386 * handle the lock release when processes blocked on it that can now run
387 * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
388 * have been set.
389 * - there must be someone on the queue
390 * - the wait_lock must be held by the caller
391 * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
392 * to actually wakeup the blocked task(s) and drop the reference count,
393 * preferably when the wait_lock is released
394 * - woken process blocks are discarded from the list after having task zeroed
395 * - writers are only marked woken if downgrading is false
396 */
397static void rwsem_mark_wake(struct rw_semaphore *sem,
398 enum rwsem_wake_type wake_type,
399 struct wake_q_head *wake_q)
400{
401 struct rwsem_waiter *waiter, *tmp;
402 long oldcount, woken = 0, adjustment = 0;
403 struct list_head wlist;
404
405 lockdep_assert_held(&sem->wait_lock);
406
407 /*
408 * Take a peek at the queue head waiter such that we can determine
409 * the wakeup(s) to perform.
410 */
411 waiter = rwsem_first_waiter(sem);
412
413 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
414 if (wake_type == RWSEM_WAKE_ANY) {
415 /*
416 * Mark writer at the front of the queue for wakeup.
417 * Until the task is actually later awoken later by
418 * the caller, other writers are able to steal it.
419 * Readers, on the other hand, will block as they
420 * will notice the queued writer.
421 */
422 wake_q_add(wake_q, waiter->task);
423 lockevent_inc(rwsem_wake_writer);
424 }
425
426 return;
427 }
428
429 /*
430 * No reader wakeup if there are too many of them already.
431 */
432 if (unlikely(atomic_long_read(&sem->count) < 0))
433 return;
434
435 /*
436 * Writers might steal the lock before we grant it to the next reader.
437 * We prefer to do the first reader grant before counting readers
438 * so we can bail out early if a writer stole the lock.
439 */
440 if (wake_type != RWSEM_WAKE_READ_OWNED) {
441 struct task_struct *owner;
442
443 adjustment = RWSEM_READER_BIAS;
444 oldcount = atomic_long_fetch_add(adjustment, &sem->count);
445 if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
446 /*
447 * When we've been waiting "too" long (for writers
448 * to give up the lock), request a HANDOFF to
449 * force the issue.
450 */
451 if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
452 time_after(jiffies, waiter->timeout)) {
453 adjustment -= RWSEM_FLAG_HANDOFF;
454 lockevent_inc(rwsem_rlock_handoff);
455 }
456
457 atomic_long_add(-adjustment, &sem->count);
458 return;
459 }
460 /*
461 * Set it to reader-owned to give spinners an early
462 * indication that readers now have the lock.
463 * The reader nonspinnable bit seen at slowpath entry of
464 * the reader is copied over.
465 */
466 owner = waiter->task;
467 if (waiter->last_rowner & RWSEM_RD_NONSPINNABLE) {
468 owner = (void *)((unsigned long)owner | RWSEM_RD_NONSPINNABLE);
469 lockevent_inc(rwsem_opt_norspin);
470 }
471 __rwsem_set_reader_owned(sem, owner);
472 }
473
474 /*
475 * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
476 * queue. We know that the woken will be at least 1 as we accounted
477 * for above. Note we increment the 'active part' of the count by the
478 * number of readers before waking any processes up.
479 *
480 * This is an adaptation of the phase-fair R/W locks where at the
481 * reader phase (first waiter is a reader), all readers are eligible
482 * to acquire the lock at the same time irrespective of their order
483 * in the queue. The writers acquire the lock according to their
484 * order in the queue.
485 *
486 * We have to do wakeup in 2 passes to prevent the possibility that
487 * the reader count may be decremented before it is incremented. It
488 * is because the to-be-woken waiter may not have slept yet. So it
489 * may see waiter->task got cleared, finish its critical section and
490 * do an unlock before the reader count increment.
491 *
492 * 1) Collect the read-waiters in a separate list, count them and
493 * fully increment the reader count in rwsem.
494 * 2) For each waiters in the new list, clear waiter->task and
495 * put them into wake_q to be woken up later.
496 */
497 INIT_LIST_HEAD(&wlist);
498 list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
499 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
500 continue;
501
502 woken++;
503 list_move_tail(&waiter->list, &wlist);
504
505 /*
506 * Limit # of readers that can be woken up per wakeup call.
507 */
508 if (woken >= MAX_READERS_WAKEUP)
509 break;
510 }
511
512 adjustment = woken * RWSEM_READER_BIAS - adjustment;
513 lockevent_cond_inc(rwsem_wake_reader, woken);
514 if (list_empty(&sem->wait_list)) {
515 /* hit end of list above */
516 adjustment -= RWSEM_FLAG_WAITERS;
517 }
518
519 /*
520 * When we've woken a reader, we no longer need to force writers
521 * to give up the lock and we can clear HANDOFF.
522 */
523 if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
524 adjustment -= RWSEM_FLAG_HANDOFF;
525
526 if (adjustment)
527 atomic_long_add(adjustment, &sem->count);
528
529 /* 2nd pass */
530 list_for_each_entry_safe(waiter, tmp, &wlist, list) {
531 struct task_struct *tsk;
532
533 tsk = waiter->task;
534 get_task_struct(tsk);
535
536 /*
537 * Ensure calling get_task_struct() before setting the reader
538 * waiter to nil such that rwsem_down_read_slowpath() cannot
539 * race with do_exit() by always holding a reference count
540 * to the task to wakeup.
541 */
542 smp_store_release(&waiter->task, NULL);
543 /*
544 * Ensure issuing the wakeup (either by us or someone else)
545 * after setting the reader waiter to nil.
546 */
547 wake_q_add_safe(wake_q, tsk);
548 }
549}
550
551/*
552 * This function must be called with the sem->wait_lock held to prevent
553 * race conditions between checking the rwsem wait list and setting the
554 * sem->count accordingly.
555 *
556 * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
557 * bit is set or the lock is acquired with handoff bit cleared.
558 */
559static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
560 enum writer_wait_state wstate)
561{
562 long count, new;
563
564 lockdep_assert_held(&sem->wait_lock);
565
566 count = atomic_long_read(&sem->count);
567 do {
568 bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
569
570 if (has_handoff && wstate == WRITER_NOT_FIRST)
571 return false;
572
573 new = count;
574
575 if (count & RWSEM_LOCK_MASK) {
576 if (has_handoff || (wstate != WRITER_HANDOFF))
577 return false;
578
579 new |= RWSEM_FLAG_HANDOFF;
580 } else {
581 new |= RWSEM_WRITER_LOCKED;
582 new &= ~RWSEM_FLAG_HANDOFF;
583
584 if (list_is_singular(&sem->wait_list))
585 new &= ~RWSEM_FLAG_WAITERS;
586 }
587 } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
588
589 /*
590 * We have either acquired the lock with handoff bit cleared or
591 * set the handoff bit.
592 */
593 if (new & RWSEM_FLAG_HANDOFF)
594 return false;
595
596 rwsem_set_owner(sem);
597 return true;
598}
599
600#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
601/*
602 * Try to acquire read lock before the reader is put on wait queue.
603 * Lock acquisition isn't allowed if the rwsem is locked or a writer handoff
604 * is ongoing.
605 */
606static inline bool rwsem_try_read_lock_unqueued(struct rw_semaphore *sem)
607{
608 long count = atomic_long_read(&sem->count);
609
610 if (count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))
611 return false;
612
613 count = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS, &sem->count);
614 if (!(count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
615 rwsem_set_reader_owned(sem);
616 lockevent_inc(rwsem_opt_rlock);
617 return true;
618 }
619
620 /* Back out the change */
621 atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
622 return false;
623}
624
625/*
626 * Try to acquire write lock before the writer has been put on wait queue.
627 */
628static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
629{
630 long count = atomic_long_read(&sem->count);
631
632 while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
633 if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
634 count | RWSEM_WRITER_LOCKED)) {
635 rwsem_set_owner(sem);
636 lockevent_inc(rwsem_opt_wlock);
637 return true;
638 }
639 }
640 return false;
641}
642
643static inline bool owner_on_cpu(struct task_struct *owner)
644{
645 /*
646 * As lock holder preemption issue, we both skip spinning if
647 * task is not on cpu or its cpu is preempted
648 */
649 return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
650}
651
652static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
653 unsigned long nonspinnable)
654{
655 struct task_struct *owner;
656 unsigned long flags;
657 bool ret = true;
658
659 BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
660
661 if (need_resched()) {
662 lockevent_inc(rwsem_opt_fail);
663 return false;
664 }
665
666 preempt_disable();
667 rcu_read_lock();
668 owner = rwsem_owner_flags(sem, &flags);
669 if ((flags & nonspinnable) || (owner && !owner_on_cpu(owner)))
670 ret = false;
671 rcu_read_unlock();
672 preempt_enable();
673
674 lockevent_cond_inc(rwsem_opt_fail, !ret);
675 return ret;
676}
677
678/*
679 * The rwsem_spin_on_owner() function returns the folowing 4 values
680 * depending on the lock owner state.
681 * OWNER_NULL : owner is currently NULL
682 * OWNER_WRITER: when owner changes and is a writer
683 * OWNER_READER: when owner changes and the new owner may be a reader.
684 * OWNER_NONSPINNABLE:
685 * when optimistic spinning has to stop because either the
686 * owner stops running, is unknown, or its timeslice has
687 * been used up.
688 */
689enum owner_state {
690 OWNER_NULL = 1 << 0,
691 OWNER_WRITER = 1 << 1,
692 OWNER_READER = 1 << 2,
693 OWNER_NONSPINNABLE = 1 << 3,
694};
695#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
696
697static inline enum owner_state
698rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long nonspinnable)
699{
700 if (flags & nonspinnable)
701 return OWNER_NONSPINNABLE;
702
703 if (flags & RWSEM_READER_OWNED)
704 return OWNER_READER;
705
706 return owner ? OWNER_WRITER : OWNER_NULL;
707}
708
709static noinline enum owner_state
710rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
711{
712 struct task_struct *new, *owner;
713 unsigned long flags, new_flags;
714 enum owner_state state;
715
716 owner = rwsem_owner_flags(sem, &flags);
717 state = rwsem_owner_state(owner, flags, nonspinnable);
718 if (state != OWNER_WRITER)
719 return state;
720
721 rcu_read_lock();
722 for (;;) {
723 if (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF) {
724 state = OWNER_NONSPINNABLE;
725 break;
726 }
727
728 new = rwsem_owner_flags(sem, &new_flags);
729 if ((new != owner) || (new_flags != flags)) {
730 state = rwsem_owner_state(new, new_flags, nonspinnable);
731 break;
732 }
733
734 /*
735 * Ensure we emit the owner->on_cpu, dereference _after_
736 * checking sem->owner still matches owner, if that fails,
737 * owner might point to free()d memory, if it still matches,
738 * the rcu_read_lock() ensures the memory stays valid.
739 */
740 barrier();
741
742 if (need_resched() || !owner_on_cpu(owner)) {
743 state = OWNER_NONSPINNABLE;
744 break;
745 }
746
747 cpu_relax();
748 }
749 rcu_read_unlock();
750
751 return state;
752}
753
754/*
755 * Calculate reader-owned rwsem spinning threshold for writer
756 *
757 * The more readers own the rwsem, the longer it will take for them to
758 * wind down and free the rwsem. So the empirical formula used to
759 * determine the actual spinning time limit here is:
760 *
761 * Spinning threshold = (10 + nr_readers/2)us
762 *
763 * The limit is capped to a maximum of 25us (30 readers). This is just
764 * a heuristic and is subjected to change in the future.
765 */
766static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
767{
768 long count = atomic_long_read(&sem->count);
769 int readers = count >> RWSEM_READER_SHIFT;
770 u64 delta;
771
772 if (readers > 30)
773 readers = 30;
774 delta = (20 + readers) * NSEC_PER_USEC / 2;
775
776 return sched_clock() + delta;
777}
778
779static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
780{
781 bool taken = false;
782 int prev_owner_state = OWNER_NULL;
783 int loop = 0;
784 u64 rspin_threshold = 0;
785 unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE
786 : RWSEM_RD_NONSPINNABLE;
787
788 preempt_disable();
789
790 /* sem->wait_lock should not be held when doing optimistic spinning */
791 if (!osq_lock(&sem->osq))
792 goto done;
793
794 /*
795 * Optimistically spin on the owner field and attempt to acquire the
796 * lock whenever the owner changes. Spinning will be stopped when:
797 * 1) the owning writer isn't running; or
798 * 2) readers own the lock and spinning time has exceeded limit.
799 */
800 for (;;) {
801 enum owner_state owner_state;
802
803 owner_state = rwsem_spin_on_owner(sem, nonspinnable);
804 if (!(owner_state & OWNER_SPINNABLE))
805 break;
806
807 /*
808 * Try to acquire the lock
809 */
810 taken = wlock ? rwsem_try_write_lock_unqueued(sem)
811 : rwsem_try_read_lock_unqueued(sem);
812
813 if (taken)
814 break;
815
816 /*
817 * Time-based reader-owned rwsem optimistic spinning
818 */
819 if (wlock && (owner_state == OWNER_READER)) {
820 /*
821 * Re-initialize rspin_threshold every time when
822 * the owner state changes from non-reader to reader.
823 * This allows a writer to steal the lock in between
824 * 2 reader phases and have the threshold reset at
825 * the beginning of the 2nd reader phase.
826 */
827 if (prev_owner_state != OWNER_READER) {
828 if (rwsem_test_oflags(sem, nonspinnable))
829 break;
830 rspin_threshold = rwsem_rspin_threshold(sem);
831 loop = 0;
832 }
833
834 /*
835 * Check time threshold once every 16 iterations to
836 * avoid calling sched_clock() too frequently so
837 * as to reduce the average latency between the times
838 * when the lock becomes free and when the spinner
839 * is ready to do a trylock.
840 */
841 else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
842 rwsem_set_nonspinnable(sem);
843 lockevent_inc(rwsem_opt_nospin);
844 break;
845 }
846 }
847
848 /*
849 * An RT task cannot do optimistic spinning if it cannot
850 * be sure the lock holder is running or live-lock may
851 * happen if the current task and the lock holder happen
852 * to run in the same CPU. However, aborting optimistic
853 * spinning while a NULL owner is detected may miss some
854 * opportunity where spinning can continue without causing
855 * problem.
856 *
857 * There are 2 possible cases where an RT task may be able
858 * to continue spinning.
859 *
860 * 1) The lock owner is in the process of releasing the
861 * lock, sem->owner is cleared but the lock has not
862 * been released yet.
863 * 2) The lock was free and owner cleared, but another
864 * task just comes in and acquire the lock before
865 * we try to get it. The new owner may be a spinnable
866 * writer.
867 *
868 * To take advantage of two scenarios listed agove, the RT
869 * task is made to retry one more time to see if it can
870 * acquire the lock or continue spinning on the new owning
871 * writer. Of course, if the time lag is long enough or the
872 * new owner is not a writer or spinnable, the RT task will
873 * quit spinning.
874 *
875 * If the owner is a writer, the need_resched() check is
876 * done inside rwsem_spin_on_owner(). If the owner is not
877 * a writer, need_resched() check needs to be done here.
878 */
879 if (owner_state != OWNER_WRITER) {
880 if (need_resched())
881 break;
882 if (rt_task(current) &&
883 (prev_owner_state != OWNER_WRITER))
884 break;
885 }
886 prev_owner_state = owner_state;
887
888 /*
889 * The cpu_relax() call is a compiler barrier which forces
890 * everything in this loop to be re-loaded. We don't need
891 * memory barriers as we'll eventually observe the right
892 * values at the cost of a few extra spins.
893 */
894 cpu_relax();
895 }
896 osq_unlock(&sem->osq);
897done:
898 preempt_enable();
899 lockevent_cond_inc(rwsem_opt_fail, !taken);
900 return taken;
901}
902
903/*
904 * Clear the owner's RWSEM_WR_NONSPINNABLE bit if it is set. This should
905 * only be called when the reader count reaches 0.
906 *
907 * This give writers better chance to acquire the rwsem first before
908 * readers when the rwsem was being held by readers for a relatively long
909 * period of time. Race can happen that an optimistic spinner may have
910 * just stolen the rwsem and set the owner, but just clearing the
911 * RWSEM_WR_NONSPINNABLE bit will do no harm anyway.
912 */
913static inline void clear_wr_nonspinnable(struct rw_semaphore *sem)
914{
915 if (rwsem_test_oflags(sem, RWSEM_WR_NONSPINNABLE))
916 atomic_long_andnot(RWSEM_WR_NONSPINNABLE, &sem->owner);
917}
918
919/*
920 * This function is called when the reader fails to acquire the lock via
921 * optimistic spinning. In this case we will still attempt to do a trylock
922 * when comparing the rwsem state right now with the state when entering
923 * the slowpath indicates that the reader is still in a valid reader phase.
924 * This happens when the following conditions are true:
925 *
926 * 1) The lock is currently reader owned, and
927 * 2) The lock is previously not reader-owned or the last read owner changes.
928 *
929 * In the former case, we have transitioned from a writer phase to a
930 * reader-phase while spinning. In the latter case, it means the reader
931 * phase hasn't ended when we entered the optimistic spinning loop. In
932 * both cases, the reader is eligible to acquire the lock. This is the
933 * secondary path where a read lock is acquired optimistically.
934 *
935 * The reader non-spinnable bit wasn't set at time of entry or it will
936 * not be here at all.
937 */
938static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
939 unsigned long last_rowner)
940{
941 unsigned long owner = atomic_long_read(&sem->owner);
942
943 if (!(owner & RWSEM_READER_OWNED))
944 return false;
945
946 if (((owner ^ last_rowner) & ~RWSEM_OWNER_FLAGS_MASK) &&
947 rwsem_try_read_lock_unqueued(sem)) {
948 lockevent_inc(rwsem_opt_rlock2);
949 lockevent_add(rwsem_opt_fail, -1);
950 return true;
951 }
952 return false;
953}
954#else
955static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
956 unsigned long nonspinnable)
957{
958 return false;
959}
960
961static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
962{
963 return false;
964}
965
966static inline void clear_wr_nonspinnable(struct rw_semaphore *sem) { }
967
968static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
969 unsigned long last_rowner)
970{
971 return false;
972}
973#endif
974
975/*
976 * Wait for the read lock to be granted
977 */
978static struct rw_semaphore __sched *
979rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
980{
981 long count, adjustment = -RWSEM_READER_BIAS;
982 struct rwsem_waiter waiter;
983 DEFINE_WAKE_Q(wake_q);
984 bool wake = false;
985
986 /*
987 * Save the current read-owner of rwsem, if available, and the
988 * reader nonspinnable bit.
989 */
990 waiter.last_rowner = atomic_long_read(&sem->owner);
991 if (!(waiter.last_rowner & RWSEM_READER_OWNED))
992 waiter.last_rowner &= RWSEM_RD_NONSPINNABLE;
993
994 if (!rwsem_can_spin_on_owner(sem, RWSEM_RD_NONSPINNABLE))
995 goto queue;
996
997 /*
998 * Undo read bias from down_read() and do optimistic spinning.
999 */
1000 atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
1001 adjustment = 0;
1002 if (rwsem_optimistic_spin(sem, false)) {
1003 /*
1004 * Wake up other readers in the wait list if the front
1005 * waiter is a reader.
1006 */
1007 if ((atomic_long_read(&sem->count) & RWSEM_FLAG_WAITERS)) {
1008 raw_spin_lock_irq(&sem->wait_lock);
1009 if (!list_empty(&sem->wait_list))
1010 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
1011 &wake_q);
1012 raw_spin_unlock_irq(&sem->wait_lock);
1013 wake_up_q(&wake_q);
1014 }
1015 return sem;
1016 } else if (rwsem_reader_phase_trylock(sem, waiter.last_rowner)) {
1017 return sem;
1018 }
1019
1020queue:
1021 waiter.task = current;
1022 waiter.type = RWSEM_WAITING_FOR_READ;
1023 waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1024
1025 raw_spin_lock_irq(&sem->wait_lock);
1026 if (list_empty(&sem->wait_list)) {
1027 /*
1028 * In case the wait queue is empty and the lock isn't owned
1029 * by a writer or has the handoff bit set, this reader can
1030 * exit the slowpath and return immediately as its
1031 * RWSEM_READER_BIAS has already been set in the count.
1032 */
1033 if (adjustment && !(atomic_long_read(&sem->count) &
1034 (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
1035 raw_spin_unlock_irq(&sem->wait_lock);
1036 rwsem_set_reader_owned(sem);
1037 lockevent_inc(rwsem_rlock_fast);
1038 return sem;
1039 }
1040 adjustment += RWSEM_FLAG_WAITERS;
1041 }
1042 list_add_tail(&waiter.list, &sem->wait_list);
1043
1044 /* we're now waiting on the lock, but no longer actively locking */
1045 if (adjustment)
1046 count = atomic_long_add_return(adjustment, &sem->count);
1047 else
1048 count = atomic_long_read(&sem->count);
1049
1050 /*
1051 * If there are no active locks, wake the front queued process(es).
1052 *
1053 * If there are no writers and we are first in the queue,
1054 * wake our own waiter to join the existing active readers !
1055 */
1056 if (!(count & RWSEM_LOCK_MASK)) {
1057 clear_wr_nonspinnable(sem);
1058 wake = true;
1059 }
1060 if (wake || (!(count & RWSEM_WRITER_MASK) &&
1061 (adjustment & RWSEM_FLAG_WAITERS)))
1062 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1063
1064 raw_spin_unlock_irq(&sem->wait_lock);
1065 wake_up_q(&wake_q);
1066
1067 /* wait to be given the lock */
1068 while (true) {
1069 set_current_state(state);
1070 if (!waiter.task)
1071 break;
1072 if (signal_pending_state(state, current)) {
1073 raw_spin_lock_irq(&sem->wait_lock);
1074 if (waiter.task)
1075 goto out_nolock;
1076 raw_spin_unlock_irq(&sem->wait_lock);
1077 break;
1078 }
1079 schedule();
1080 lockevent_inc(rwsem_sleep_reader);
1081 }
1082
1083 __set_current_state(TASK_RUNNING);
1084 lockevent_inc(rwsem_rlock);
1085 return sem;
1086out_nolock:
1087 list_del(&waiter.list);
1088 if (list_empty(&sem->wait_list)) {
1089 atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
1090 &sem->count);
1091 }
1092 raw_spin_unlock_irq(&sem->wait_lock);
1093 __set_current_state(TASK_RUNNING);
1094 lockevent_inc(rwsem_rlock_fail);
1095 return ERR_PTR(-EINTR);
1096}
1097
1098/*
1099 * This function is called by the a write lock owner. So the owner value
1100 * won't get changed by others.
1101 */
1102static inline void rwsem_disable_reader_optspin(struct rw_semaphore *sem,
1103 bool disable)
1104{
1105 if (unlikely(disable)) {
1106 atomic_long_or(RWSEM_RD_NONSPINNABLE, &sem->owner);
1107 lockevent_inc(rwsem_opt_norspin);
1108 }
1109}
1110
1111/*
1112 * Wait until we successfully acquire the write lock
1113 */
1114static struct rw_semaphore *
1115rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1116{
1117 long count;
1118 bool disable_rspin;
1119 enum writer_wait_state wstate;
1120 struct rwsem_waiter waiter;
1121 struct rw_semaphore *ret = sem;
1122 DEFINE_WAKE_Q(wake_q);
1123
1124 /* do optimistic spinning and steal lock if possible */
1125 if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&
1126 rwsem_optimistic_spin(sem, true))
1127 return sem;
1128
1129 /*
1130 * Disable reader optimistic spinning for this rwsem after
1131 * acquiring the write lock when the setting of the nonspinnable
1132 * bits are observed.
1133 */
1134 disable_rspin = atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE;
1135
1136 /*
1137 * Optimistic spinning failed, proceed to the slowpath
1138 * and block until we can acquire the sem.
1139 */
1140 waiter.task = current;
1141 waiter.type = RWSEM_WAITING_FOR_WRITE;
1142 waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1143
1144 raw_spin_lock_irq(&sem->wait_lock);
1145
1146 /* account for this before adding a new element to the list */
1147 wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
1148
1149 list_add_tail(&waiter.list, &sem->wait_list);
1150
1151 /* we're now waiting on the lock */
1152 if (wstate == WRITER_NOT_FIRST) {
1153 count = atomic_long_read(&sem->count);
1154
1155 /*
1156 * If there were already threads queued before us and:
1157 * 1) there are no no active locks, wake the front
1158 * queued process(es) as the handoff bit might be set.
1159 * 2) there are no active writers and some readers, the lock
1160 * must be read owned; so we try to wake any read lock
1161 * waiters that were queued ahead of us.
1162 */
1163 if (count & RWSEM_WRITER_MASK)
1164 goto wait;
1165
1166 rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
1167 ? RWSEM_WAKE_READERS
1168 : RWSEM_WAKE_ANY, &wake_q);
1169
1170 if (!wake_q_empty(&wake_q)) {
1171 /*
1172 * We want to minimize wait_lock hold time especially
1173 * when a large number of readers are to be woken up.
1174 */
1175 raw_spin_unlock_irq(&sem->wait_lock);
1176 wake_up_q(&wake_q);
1177 wake_q_init(&wake_q); /* Used again, reinit */
1178 raw_spin_lock_irq(&sem->wait_lock);
1179 }
1180 } else {
1181 atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
1182 }
1183
1184wait:
1185 /* wait until we successfully acquire the lock */
1186 set_current_state(state);
1187 while (true) {
1188 if (rwsem_try_write_lock(sem, wstate))
1189 break;
1190
1191 raw_spin_unlock_irq(&sem->wait_lock);
1192
1193 /* Block until there are no active lockers. */
1194 for (;;) {
1195 if (signal_pending_state(state, current))
1196 goto out_nolock;
1197
1198 schedule();
1199 lockevent_inc(rwsem_sleep_writer);
1200 set_current_state(state);
1201 /*
1202 * If HANDOFF bit is set, unconditionally do
1203 * a trylock.
1204 */
1205 if (wstate == WRITER_HANDOFF)
1206 break;
1207
1208 if ((wstate == WRITER_NOT_FIRST) &&
1209 (rwsem_first_waiter(sem) == &waiter))
1210 wstate = WRITER_FIRST;
1211
1212 count = atomic_long_read(&sem->count);
1213 if (!(count & RWSEM_LOCK_MASK))
1214 break;
1215
1216 /*
1217 * The setting of the handoff bit is deferred
1218 * until rwsem_try_write_lock() is called.
1219 */
1220 if ((wstate == WRITER_FIRST) && (rt_task(current) ||
1221 time_after(jiffies, waiter.timeout))) {
1222 wstate = WRITER_HANDOFF;
1223 lockevent_inc(rwsem_wlock_handoff);
1224 break;
1225 }
1226 }
1227
1228 raw_spin_lock_irq(&sem->wait_lock);
1229 }
1230 __set_current_state(TASK_RUNNING);
1231 list_del(&waiter.list);
1232 rwsem_disable_reader_optspin(sem, disable_rspin);
1233 raw_spin_unlock_irq(&sem->wait_lock);
1234 lockevent_inc(rwsem_wlock);
1235
1236 return ret;
1237
1238out_nolock:
1239 __set_current_state(TASK_RUNNING);
1240 raw_spin_lock_irq(&sem->wait_lock);
1241 list_del(&waiter.list);
1242
1243 if (unlikely(wstate == WRITER_HANDOFF))
1244 atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count);
1245
1246 if (list_empty(&sem->wait_list))
1247 atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
1248 else
1249 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1250 raw_spin_unlock_irq(&sem->wait_lock);
1251 wake_up_q(&wake_q);
1252 lockevent_inc(rwsem_wlock_fail);
1253
1254 return ERR_PTR(-EINTR);
1255}
1256
1257/*
1258 * handle waking up a waiter on the semaphore
1259 * - up_read/up_write has decremented the active part of count if we come here
1260 */
1261static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
1262{
1263 unsigned long flags;
1264 DEFINE_WAKE_Q(wake_q);
1265
1266 raw_spin_lock_irqsave(&sem->wait_lock, flags);
1267
1268 if (!list_empty(&sem->wait_list))
1269 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1270
1271 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1272 wake_up_q(&wake_q);
1273
1274 return sem;
1275}
1276
1277/*
1278 * downgrade a write lock into a read lock
1279 * - caller incremented waiting part of count and discovered it still negative
1280 * - just wake up any readers at the front of the queue
1281 */
1282static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
1283{
1284 unsigned long flags;
1285 DEFINE_WAKE_Q(wake_q);
1286
1287 raw_spin_lock_irqsave(&sem->wait_lock, flags);
1288
1289 if (!list_empty(&sem->wait_list))
1290 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
1291
1292 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1293 wake_up_q(&wake_q);
1294
1295 return sem;
1296}
1297
1298/*
1299 * lock for reading
1300 */
1301inline void __down_read(struct rw_semaphore *sem)
1302{
1303 if (!rwsem_read_trylock(sem)) {
1304 rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
1305 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1306 } else {
1307 rwsem_set_reader_owned(sem);
1308 }
1309}
1310
1311static inline int __down_read_killable(struct rw_semaphore *sem)
1312{
1313 if (!rwsem_read_trylock(sem)) {
1314 if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE)))
1315 return -EINTR;
1316 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1317 } else {
1318 rwsem_set_reader_owned(sem);
1319 }
1320 return 0;
1321}
1322
1323static inline int __down_read_trylock(struct rw_semaphore *sem)
1324{
1325 /*
1326 * Optimize for the case when the rwsem is not locked at all.
1327 */
1328 long tmp = RWSEM_UNLOCKED_VALUE;
1329
1330 do {
1331 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1332 tmp + RWSEM_READER_BIAS)) {
1333 rwsem_set_reader_owned(sem);
1334 return 1;
1335 }
1336 } while (!(tmp & RWSEM_READ_FAILED_MASK));
1337 return 0;
1338}
1339
1340/*
1341 * lock for writing
1342 */
1343static inline void __down_write(struct rw_semaphore *sem)
1344{
1345 long tmp = RWSEM_UNLOCKED_VALUE;
1346
1347 if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1348 RWSEM_WRITER_LOCKED)))
1349 rwsem_down_write_slowpath(sem, TASK_UNINTERRUPTIBLE);
1350 else
1351 rwsem_set_owner(sem);
1352}
1353
1354static inline int __down_write_killable(struct rw_semaphore *sem)
1355{
1356 long tmp = RWSEM_UNLOCKED_VALUE;
1357
1358 if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1359 RWSEM_WRITER_LOCKED))) {
1360 if (IS_ERR(rwsem_down_write_slowpath(sem, TASK_KILLABLE)))
1361 return -EINTR;
1362 } else {
1363 rwsem_set_owner(sem);
1364 }
1365 return 0;
1366}
1367
1368static inline int __down_write_trylock(struct rw_semaphore *sem)
1369{
1370 long tmp = RWSEM_UNLOCKED_VALUE;
1371
1372 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1373 RWSEM_WRITER_LOCKED)) {
1374 rwsem_set_owner(sem);
1375 return true;
1376 }
1377 return false;
1378}
1379
1380/*
1381 * unlock after reading
1382 */
1383inline void __up_read(struct rw_semaphore *sem)
1384{
1385 long tmp;
1386
1387 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1388 rwsem_clear_reader_owned(sem);
1389 tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
1390 DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
1391 if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
1392 RWSEM_FLAG_WAITERS)) {
1393 clear_wr_nonspinnable(sem);
1394 rwsem_wake(sem, tmp);
1395 }
1396}
1397
1398/*
1399 * unlock after writing
1400 */
1401static inline void __up_write(struct rw_semaphore *sem)
1402{
1403 long tmp;
1404
1405 /*
1406 * sem->owner may differ from current if the ownership is transferred
1407 * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
1408 */
1409 DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
1410 !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1411 rwsem_clear_owner(sem);
1412 tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
1413 if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1414 rwsem_wake(sem, tmp);
1415}
1416
1417/*
1418 * downgrade write lock to read lock
1419 */
1420static inline void __downgrade_write(struct rw_semaphore *sem)
1421{
1422 long tmp;
1423
1424 /*
1425 * When downgrading from exclusive to shared ownership,
1426 * anything inside the write-locked region cannot leak
1427 * into the read side. In contrast, anything in the
1428 * read-locked region is ok to be re-ordered into the
1429 * write side. As such, rely on RELEASE semantics.
1430 */
1431 DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
1432 tmp = atomic_long_fetch_add_release(
1433 -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
1434 rwsem_set_reader_owned(sem);
1435 if (tmp & RWSEM_FLAG_WAITERS)
1436 rwsem_downgrade_wake(sem);
1437}
17 1438
18/* 1439/*
19 * lock for reading 1440 * lock for reading
@@ -25,7 +1446,6 @@ void __sched down_read(struct rw_semaphore *sem)
25 1446
26 LOCK_CONTENDED(sem, __down_read_trylock, __down_read); 1447 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
27} 1448}
28
29EXPORT_SYMBOL(down_read); 1449EXPORT_SYMBOL(down_read);
30 1450
31int __sched down_read_killable(struct rw_semaphore *sem) 1451int __sched down_read_killable(struct rw_semaphore *sem)
@@ -40,7 +1460,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
40 1460
41 return 0; 1461 return 0;
42} 1462}
43
44EXPORT_SYMBOL(down_read_killable); 1463EXPORT_SYMBOL(down_read_killable);
45 1464
46/* 1465/*
@@ -54,7 +1473,6 @@ int down_read_trylock(struct rw_semaphore *sem)
54 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); 1473 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
55 return ret; 1474 return ret;
56} 1475}
57
58EXPORT_SYMBOL(down_read_trylock); 1476EXPORT_SYMBOL(down_read_trylock);
59 1477
60/* 1478/*
@@ -64,10 +1482,8 @@ void __sched down_write(struct rw_semaphore *sem)
64{ 1482{
65 might_sleep(); 1483 might_sleep();
66 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 1484 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
67
68 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 1485 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
69} 1486}
70
71EXPORT_SYMBOL(down_write); 1487EXPORT_SYMBOL(down_write);
72 1488
73/* 1489/*
@@ -78,14 +1494,14 @@ int __sched down_write_killable(struct rw_semaphore *sem)
78 might_sleep(); 1494 might_sleep();
79 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 1495 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
80 1496
81 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) { 1497 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1498 __down_write_killable)) {
82 rwsem_release(&sem->dep_map, 1, _RET_IP_); 1499 rwsem_release(&sem->dep_map, 1, _RET_IP_);
83 return -EINTR; 1500 return -EINTR;
84 } 1501 }
85 1502
86 return 0; 1503 return 0;
87} 1504}
88
89EXPORT_SYMBOL(down_write_killable); 1505EXPORT_SYMBOL(down_write_killable);
90 1506
91/* 1507/*
@@ -100,7 +1516,6 @@ int down_write_trylock(struct rw_semaphore *sem)
100 1516
101 return ret; 1517 return ret;
102} 1518}
103
104EXPORT_SYMBOL(down_write_trylock); 1519EXPORT_SYMBOL(down_write_trylock);
105 1520
106/* 1521/*
@@ -109,10 +1524,8 @@ EXPORT_SYMBOL(down_write_trylock);
109void up_read(struct rw_semaphore *sem) 1524void up_read(struct rw_semaphore *sem)
110{ 1525{
111 rwsem_release(&sem->dep_map, 1, _RET_IP_); 1526 rwsem_release(&sem->dep_map, 1, _RET_IP_);
112
113 __up_read(sem); 1527 __up_read(sem);
114} 1528}
115
116EXPORT_SYMBOL(up_read); 1529EXPORT_SYMBOL(up_read);
117 1530
118/* 1531/*
@@ -121,10 +1534,8 @@ EXPORT_SYMBOL(up_read);
121void up_write(struct rw_semaphore *sem) 1534void up_write(struct rw_semaphore *sem)
122{ 1535{
123 rwsem_release(&sem->dep_map, 1, _RET_IP_); 1536 rwsem_release(&sem->dep_map, 1, _RET_IP_);
124
125 __up_write(sem); 1537 __up_write(sem);
126} 1538}
127
128EXPORT_SYMBOL(up_write); 1539EXPORT_SYMBOL(up_write);
129 1540
130/* 1541/*
@@ -133,10 +1544,8 @@ EXPORT_SYMBOL(up_write);
133void downgrade_write(struct rw_semaphore *sem) 1544void downgrade_write(struct rw_semaphore *sem)
134{ 1545{
135 lock_downgrade(&sem->dep_map, _RET_IP_); 1546 lock_downgrade(&sem->dep_map, _RET_IP_);
136
137 __downgrade_write(sem); 1547 __downgrade_write(sem);
138} 1548}
139
140EXPORT_SYMBOL(downgrade_write); 1549EXPORT_SYMBOL(downgrade_write);
141 1550
142#ifdef CONFIG_DEBUG_LOCK_ALLOC 1551#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -145,40 +1554,32 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
145{ 1554{
146 might_sleep(); 1555 might_sleep();
147 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); 1556 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
148
149 LOCK_CONTENDED(sem, __down_read_trylock, __down_read); 1557 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
150} 1558}
151
152EXPORT_SYMBOL(down_read_nested); 1559EXPORT_SYMBOL(down_read_nested);
153 1560
154void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) 1561void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
155{ 1562{
156 might_sleep(); 1563 might_sleep();
157 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); 1564 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
158
159 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 1565 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
160} 1566}
161
162EXPORT_SYMBOL(_down_write_nest_lock); 1567EXPORT_SYMBOL(_down_write_nest_lock);
163 1568
164void down_read_non_owner(struct rw_semaphore *sem) 1569void down_read_non_owner(struct rw_semaphore *sem)
165{ 1570{
166 might_sleep(); 1571 might_sleep();
167
168 __down_read(sem); 1572 __down_read(sem);
169 __rwsem_set_reader_owned(sem, NULL); 1573 __rwsem_set_reader_owned(sem, NULL);
170} 1574}
171
172EXPORT_SYMBOL(down_read_non_owner); 1575EXPORT_SYMBOL(down_read_non_owner);
173 1576
174void down_write_nested(struct rw_semaphore *sem, int subclass) 1577void down_write_nested(struct rw_semaphore *sem, int subclass)
175{ 1578{
176 might_sleep(); 1579 might_sleep();
177 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 1580 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
178
179 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 1581 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
180} 1582}
181
182EXPORT_SYMBOL(down_write_nested); 1583EXPORT_SYMBOL(down_write_nested);
183 1584
184int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) 1585int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
@@ -186,23 +1587,21 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
186 might_sleep(); 1587 might_sleep();
187 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 1588 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
188 1589
189 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) { 1590 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1591 __down_write_killable)) {
190 rwsem_release(&sem->dep_map, 1, _RET_IP_); 1592 rwsem_release(&sem->dep_map, 1, _RET_IP_);
191 return -EINTR; 1593 return -EINTR;
192 } 1594 }
193 1595
194 return 0; 1596 return 0;
195} 1597}
196
197EXPORT_SYMBOL(down_write_killable_nested); 1598EXPORT_SYMBOL(down_write_killable_nested);
198 1599
199void up_read_non_owner(struct rw_semaphore *sem) 1600void up_read_non_owner(struct rw_semaphore *sem)
200{ 1601{
201 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), 1602 DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
202 sem);
203 __up_read(sem); 1603 __up_read(sem);
204} 1604}
205
206EXPORT_SYMBOL(up_read_non_owner); 1605EXPORT_SYMBOL(up_read_non_owner);
207 1606
208#endif 1607#endif
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 64877f5294e3..2534ce49f648 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,304 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * The least significant 2 bits of the owner value has the following
4 * meanings when set.
5 * - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers
6 * - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned,
7 * i.e. the owner(s) cannot be readily determined. It can be reader
8 * owned or the owning writer is indeterminate.
9 *
10 * When a writer acquires a rwsem, it puts its task_struct pointer
11 * into the owner field. It is cleared after an unlock.
12 *
13 * When a reader acquires a rwsem, it will also puts its task_struct
14 * pointer into the owner field with both the RWSEM_READER_OWNED and
15 * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will
16 * largely be left untouched. So for a free or reader-owned rwsem,
17 * the owner value may contain information about the last reader that
18 * acquires the rwsem. The anonymous bit is set because that particular
19 * reader may or may not still own the lock.
20 *
21 * That information may be helpful in debugging cases where the system
22 * seems to hang on a reader owned rwsem especially if only one reader
23 * is involved. Ideally we would like to track all the readers that own
24 * a rwsem, but the overhead is simply too big.
25 */
26#include "lock_events.h"
27 2
28#define RWSEM_READER_OWNED (1UL << 0) 3#ifndef __INTERNAL_RWSEM_H
29#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) 4#define __INTERNAL_RWSEM_H
5#include <linux/rwsem.h>
30 6
31#ifdef CONFIG_DEBUG_RWSEMS 7extern void __down_read(struct rw_semaphore *sem);
32# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ 8extern void __up_read(struct rw_semaphore *sem);
33 if (!debug_locks_silent && \
34 WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
35 #c, atomic_long_read(&(sem)->count), \
36 (long)((sem)->owner), (long)current, \
37 list_empty(&(sem)->wait_list) ? "" : "not ")) \
38 debug_locks_off(); \
39 } while (0)
40#else
41# define DEBUG_RWSEMS_WARN_ON(c, sem)
42#endif
43 9
44/* 10#endif /* __INTERNAL_RWSEM_H */
45 * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
46 * Adapted largely from include/asm-i386/rwsem.h
47 * by Paul Mackerras <paulus@samba.org>.
48 */
49
50/*
51 * the semaphore definition
52 */
53#ifdef CONFIG_64BIT
54# define RWSEM_ACTIVE_MASK 0xffffffffL
55#else
56# define RWSEM_ACTIVE_MASK 0x0000ffffL
57#endif
58
59#define RWSEM_ACTIVE_BIAS 0x00000001L
60#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
61#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
62#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
63
64#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
65/*
66 * All writes to owner are protected by WRITE_ONCE() to make sure that
67 * store tearing can't happen as optimistic spinners may read and use
68 * the owner value concurrently without lock. Read from owner, however,
69 * may not need READ_ONCE() as long as the pointer value is only used
70 * for comparison and isn't being dereferenced.
71 */
72static inline void rwsem_set_owner(struct rw_semaphore *sem)
73{
74 WRITE_ONCE(sem->owner, current);
75}
76
77static inline void rwsem_clear_owner(struct rw_semaphore *sem)
78{
79 WRITE_ONCE(sem->owner, NULL);
80}
81
82/*
83 * The task_struct pointer of the last owning reader will be left in
84 * the owner field.
85 *
86 * Note that the owner value just indicates the task has owned the rwsem
87 * previously, it may not be the real owner or one of the real owners
88 * anymore when that field is examined, so take it with a grain of salt.
89 */
90static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
91 struct task_struct *owner)
92{
93 unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED
94 | RWSEM_ANONYMOUSLY_OWNED;
95
96 WRITE_ONCE(sem->owner, (struct task_struct *)val);
97}
98
99static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
100{
101 __rwsem_set_reader_owned(sem, current);
102}
103
104/*
105 * Return true if the a rwsem waiter can spin on the rwsem's owner
106 * and steal the lock, i.e. the lock is not anonymously owned.
107 * N.B. !owner is considered spinnable.
108 */
109static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
110{
111 return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
112}
113
114/*
115 * Return true if rwsem is owned by an anonymous writer or readers.
116 */
117static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
118{
119 return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
120}
121
122#ifdef CONFIG_DEBUG_RWSEMS
123/*
124 * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
125 * is a task pointer in owner of a reader-owned rwsem, it will be the
126 * real owner or one of the real owners. The only exception is when the
127 * unlock is done by up_read_non_owner().
128 */
129#define rwsem_clear_reader_owned rwsem_clear_reader_owned
130static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
131{
132 unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
133 | RWSEM_ANONYMOUSLY_OWNED;
134 if (READ_ONCE(sem->owner) == (struct task_struct *)val)
135 cmpxchg_relaxed((unsigned long *)&sem->owner, val,
136 RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED);
137}
138#endif
139
140#else
141static inline void rwsem_set_owner(struct rw_semaphore *sem)
142{
143}
144
145static inline void rwsem_clear_owner(struct rw_semaphore *sem)
146{
147}
148
149static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
150 struct task_struct *owner)
151{
152}
153
154static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
155{
156}
157#endif
158
159#ifndef rwsem_clear_reader_owned
160static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
161{
162}
163#endif
164
165extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
166extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
167extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
168extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
169extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
170extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
171
172/*
173 * lock for reading
174 */
175static inline void __down_read(struct rw_semaphore *sem)
176{
177 if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
178 rwsem_down_read_failed(sem);
179 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
180 RWSEM_READER_OWNED), sem);
181 } else {
182 rwsem_set_reader_owned(sem);
183 }
184}
185
186static inline int __down_read_killable(struct rw_semaphore *sem)
187{
188 if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
189 if (IS_ERR(rwsem_down_read_failed_killable(sem)))
190 return -EINTR;
191 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
192 RWSEM_READER_OWNED), sem);
193 } else {
194 rwsem_set_reader_owned(sem);
195 }
196 return 0;
197}
198
199static inline int __down_read_trylock(struct rw_semaphore *sem)
200{
201 /*
202 * Optimize for the case when the rwsem is not locked at all.
203 */
204 long tmp = RWSEM_UNLOCKED_VALUE;
205
206 lockevent_inc(rwsem_rtrylock);
207 do {
208 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
209 tmp + RWSEM_ACTIVE_READ_BIAS)) {
210 rwsem_set_reader_owned(sem);
211 return 1;
212 }
213 } while (tmp >= 0);
214 return 0;
215}
216
217/*
218 * lock for writing
219 */
220static inline void __down_write(struct rw_semaphore *sem)
221{
222 long tmp;
223
224 tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
225 &sem->count);
226 if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
227 rwsem_down_write_failed(sem);
228 rwsem_set_owner(sem);
229}
230
231static inline int __down_write_killable(struct rw_semaphore *sem)
232{
233 long tmp;
234
235 tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
236 &sem->count);
237 if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
238 if (IS_ERR(rwsem_down_write_failed_killable(sem)))
239 return -EINTR;
240 rwsem_set_owner(sem);
241 return 0;
242}
243
244static inline int __down_write_trylock(struct rw_semaphore *sem)
245{
246 long tmp;
247
248 lockevent_inc(rwsem_wtrylock);
249 tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
250 RWSEM_ACTIVE_WRITE_BIAS);
251 if (tmp == RWSEM_UNLOCKED_VALUE) {
252 rwsem_set_owner(sem);
253 return true;
254 }
255 return false;
256}
257
258/*
259 * unlock after reading
260 */
261static inline void __up_read(struct rw_semaphore *sem)
262{
263 long tmp;
264
265 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
266 sem);
267 rwsem_clear_reader_owned(sem);
268 tmp = atomic_long_dec_return_release(&sem->count);
269 if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
270 rwsem_wake(sem);
271}
272
273/*
274 * unlock after writing
275 */
276static inline void __up_write(struct rw_semaphore *sem)
277{
278 DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
279 rwsem_clear_owner(sem);
280 if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
281 &sem->count) < 0))
282 rwsem_wake(sem);
283}
284
285/*
286 * downgrade write lock to read lock
287 */
288static inline void __downgrade_write(struct rw_semaphore *sem)
289{
290 long tmp;
291
292 /*
293 * When downgrading from exclusive to shared ownership,
294 * anything inside the write-locked region cannot leak
295 * into the read side. In contrast, anything in the
296 * read-locked region is ok to be re-ordered into the
297 * write side. As such, rely on RELEASE semantics.
298 */
299 DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
300 tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
301 rwsem_set_reader_owned(sem);
302 if (tmp < 0)
303 rwsem_downgrade_wake(sem);
304}
diff --git a/kernel/module.c b/kernel/module.c
index 80c7c09584cf..a2cee14a83f3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3083,6 +3083,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
3083 sizeof(*mod->tracepoints_ptrs), 3083 sizeof(*mod->tracepoints_ptrs),
3084 &mod->num_tracepoints); 3084 &mod->num_tracepoints);
3085#endif 3085#endif
3086#ifdef CONFIG_TREE_SRCU
3087 mod->srcu_struct_ptrs = section_objs(info, "___srcu_struct_ptrs",
3088 sizeof(*mod->srcu_struct_ptrs),
3089 &mod->num_srcu_structs);
3090#endif
3086#ifdef CONFIG_BPF_EVENTS 3091#ifdef CONFIG_BPF_EVENTS
3087 mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map", 3092 mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
3088 sizeof(*mod->bpf_raw_events), 3093 sizeof(*mod->bpf_raw_events),
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 9505101ed2bc..096211299c07 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -493,6 +493,9 @@ int suspend_devices_and_enter(suspend_state_t state)
493 493
494 pm_suspend_target_state = state; 494 pm_suspend_target_state = state;
495 495
496 if (state == PM_SUSPEND_TO_IDLE)
497 pm_set_suspend_no_platform();
498
496 error = platform_suspend_begin(state); 499 error = platform_suspend_begin(state);
497 if (error) 500 if (error)
498 goto Close; 501 goto Close;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 8456b6e2205f..83a531cea2f3 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -79,9 +79,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
79 */ 79 */
80static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) 80static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
81{ 81{
82 rcu_read_lock(); 82 __ptrace_link(child, new_parent, current_cred());
83 __ptrace_link(child, new_parent, __task_cred(new_parent));
84 rcu_read_unlock();
85} 83}
86 84
87/** 85/**
@@ -118,6 +116,9 @@ void __ptrace_unlink(struct task_struct *child)
118 BUG_ON(!child->ptrace); 116 BUG_ON(!child->ptrace);
119 117
120 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 118 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
119#ifdef TIF_SYSCALL_EMU
120 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
121#endif
121 122
122 child->parent = child->real_parent; 123 child->parent = child->real_parent;
123 list_del_init(&child->ptrace_entry); 124 list_del_init(&child->ptrace_entry);
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 390aab20115e..5290b01de534 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -446,6 +446,7 @@ void rcu_request_urgent_qs_task(struct task_struct *t);
446enum rcutorture_type { 446enum rcutorture_type {
447 RCU_FLAVOR, 447 RCU_FLAVOR,
448 RCU_TASKS_FLAVOR, 448 RCU_TASKS_FLAVOR,
449 RCU_TRIVIAL_FLAVOR,
449 SRCU_FLAVOR, 450 SRCU_FLAVOR,
450 INVALID_RCU_FLAVOR 451 INVALID_RCU_FLAVOR
451}; 452};
@@ -479,6 +480,10 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
479#endif 480#endif
480#endif 481#endif
481 482
483#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
484long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask);
485#endif
486
482#ifdef CONFIG_TINY_SRCU 487#ifdef CONFIG_TINY_SRCU
483 488
484static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, 489static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index efaa5b3f4d3f..fce4e7e6f502 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -299,6 +299,7 @@ struct rcu_torture_ops {
299 int irq_capable; 299 int irq_capable;
300 int can_boost; 300 int can_boost;
301 int extendables; 301 int extendables;
302 int slow_gps;
302 const char *name; 303 const char *name;
303}; 304};
304 305
@@ -667,9 +668,51 @@ static struct rcu_torture_ops tasks_ops = {
667 .fqs = NULL, 668 .fqs = NULL,
668 .stats = NULL, 669 .stats = NULL,
669 .irq_capable = 1, 670 .irq_capable = 1,
671 .slow_gps = 1,
670 .name = "tasks" 672 .name = "tasks"
671}; 673};
672 674
675/*
676 * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
677 * This implementation does not necessarily work well with CPU hotplug.
678 */
679
680static void synchronize_rcu_trivial(void)
681{
682 int cpu;
683
684 for_each_online_cpu(cpu) {
685 rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
686 WARN_ON_ONCE(raw_smp_processor_id() != cpu);
687 }
688}
689
690static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
691{
692 preempt_disable();
693 return 0;
694}
695
696static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
697{
698 preempt_enable();
699}
700
701static struct rcu_torture_ops trivial_ops = {
702 .ttype = RCU_TRIVIAL_FLAVOR,
703 .init = rcu_sync_torture_init,
704 .readlock = rcu_torture_read_lock_trivial,
705 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
706 .readunlock = rcu_torture_read_unlock_trivial,
707 .get_gp_seq = rcu_no_completed,
708 .sync = synchronize_rcu_trivial,
709 .exp_sync = synchronize_rcu_trivial,
710 .fqs = NULL,
711 .stats = NULL,
712 .irq_capable = 1,
713 .name = "trivial"
714};
715
673static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old) 716static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
674{ 717{
675 if (!cur_ops->gp_diff) 718 if (!cur_ops->gp_diff)
@@ -1010,10 +1053,17 @@ rcu_torture_writer(void *arg)
1010 !rcu_gp_is_normal(); 1053 !rcu_gp_is_normal();
1011 } 1054 }
1012 rcu_torture_writer_state = RTWS_STUTTER; 1055 rcu_torture_writer_state = RTWS_STUTTER;
1013 if (stutter_wait("rcu_torture_writer")) 1056 if (stutter_wait("rcu_torture_writer") &&
1057 !READ_ONCE(rcu_fwd_cb_nodelay) &&
1058 !cur_ops->slow_gps &&
1059 !torture_must_stop())
1014 for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) 1060 for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
1015 if (list_empty(&rcu_tortures[i].rtort_free)) 1061 if (list_empty(&rcu_tortures[i].rtort_free) &&
1016 WARN_ON_ONCE(1); 1062 rcu_access_pointer(rcu_torture_current) !=
1063 &rcu_tortures[i]) {
1064 rcu_ftrace_dump(DUMP_ALL);
1065 WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
1066 }
1017 } while (!torture_must_stop()); 1067 } while (!torture_must_stop());
1018 /* Reset expediting back to unexpedited. */ 1068 /* Reset expediting back to unexpedited. */
1019 if (expediting > 0) 1069 if (expediting > 0)
@@ -1358,8 +1408,9 @@ rcu_torture_stats_print(void)
1358 } 1408 }
1359 1409
1360 pr_alert("%s%s ", torture_type, TORTURE_FLAG); 1410 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1361 pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", 1411 pr_cont("rtc: %p %s: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
1362 rcu_torture_current, 1412 rcu_torture_current,
1413 rcu_torture_current ? "ver" : "VER",
1363 rcu_torture_current_version, 1414 rcu_torture_current_version,
1364 list_empty(&rcu_torture_freelist), 1415 list_empty(&rcu_torture_freelist),
1365 atomic_read(&n_rcu_torture_alloc), 1416 atomic_read(&n_rcu_torture_alloc),
@@ -1661,6 +1712,17 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp)
1661 spin_unlock_irqrestore(&rcu_fwd_lock, flags); 1712 spin_unlock_irqrestore(&rcu_fwd_lock, flags);
1662} 1713}
1663 1714
1715// Give the scheduler a chance, even on nohz_full CPUs.
1716static void rcu_torture_fwd_prog_cond_resched(void)
1717{
1718 if (IS_ENABLED(CONFIG_PREEMPT) && IS_ENABLED(CONFIG_NO_HZ_FULL)) {
1719 if (need_resched())
1720 schedule();
1721 } else {
1722 cond_resched();
1723 }
1724}
1725
1664/* 1726/*
1665 * Free all callbacks on the rcu_fwd_cb_head list, either because the 1727 * Free all callbacks on the rcu_fwd_cb_head list, either because the
1666 * test is over or because we hit an OOM event. 1728 * test is over or because we hit an OOM event.
@@ -1674,16 +1736,18 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void)
1674 for (;;) { 1736 for (;;) {
1675 spin_lock_irqsave(&rcu_fwd_lock, flags); 1737 spin_lock_irqsave(&rcu_fwd_lock, flags);
1676 rfcp = rcu_fwd_cb_head; 1738 rfcp = rcu_fwd_cb_head;
1677 if (!rfcp) 1739 if (!rfcp) {
1740 spin_unlock_irqrestore(&rcu_fwd_lock, flags);
1678 break; 1741 break;
1742 }
1679 rcu_fwd_cb_head = rfcp->rfc_next; 1743 rcu_fwd_cb_head = rfcp->rfc_next;
1680 if (!rcu_fwd_cb_head) 1744 if (!rcu_fwd_cb_head)
1681 rcu_fwd_cb_tail = &rcu_fwd_cb_head; 1745 rcu_fwd_cb_tail = &rcu_fwd_cb_head;
1682 spin_unlock_irqrestore(&rcu_fwd_lock, flags); 1746 spin_unlock_irqrestore(&rcu_fwd_lock, flags);
1683 kfree(rfcp); 1747 kfree(rfcp);
1684 freed++; 1748 freed++;
1749 rcu_torture_fwd_prog_cond_resched();
1685 } 1750 }
1686 spin_unlock_irqrestore(&rcu_fwd_lock, flags);
1687 return freed; 1751 return freed;
1688} 1752}
1689 1753
@@ -1707,6 +1771,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
1707 } 1771 }
1708 1772
1709 /* Tight loop containing cond_resched(). */ 1773 /* Tight loop containing cond_resched(). */
1774 WRITE_ONCE(rcu_fwd_cb_nodelay, true);
1775 cur_ops->sync(); /* Later readers see above write. */
1710 if (selfpropcb) { 1776 if (selfpropcb) {
1711 WRITE_ONCE(fcs.stop, 0); 1777 WRITE_ONCE(fcs.stop, 0);
1712 cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb); 1778 cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb);
@@ -1724,7 +1790,7 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
1724 udelay(10); 1790 udelay(10);
1725 cur_ops->readunlock(idx); 1791 cur_ops->readunlock(idx);
1726 if (!fwd_progress_need_resched || need_resched()) 1792 if (!fwd_progress_need_resched || need_resched())
1727 cond_resched(); 1793 rcu_torture_fwd_prog_cond_resched();
1728 } 1794 }
1729 (*tested_tries)++; 1795 (*tested_tries)++;
1730 if (!time_before(jiffies, stopat) && 1796 if (!time_before(jiffies, stopat) &&
@@ -1745,6 +1811,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
1745 WARN_ON(READ_ONCE(fcs.stop) != 2); 1811 WARN_ON(READ_ONCE(fcs.stop) != 2);
1746 destroy_rcu_head_on_stack(&fcs.rh); 1812 destroy_rcu_head_on_stack(&fcs.rh);
1747 } 1813 }
1814 schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
1815 WRITE_ONCE(rcu_fwd_cb_nodelay, false);
1748} 1816}
1749 1817
1750/* Carry out call_rcu() forward-progress testing. */ 1818/* Carry out call_rcu() forward-progress testing. */
@@ -1765,6 +1833,8 @@ static void rcu_torture_fwd_prog_cr(void)
1765 1833
1766 if (READ_ONCE(rcu_fwd_emergency_stop)) 1834 if (READ_ONCE(rcu_fwd_emergency_stop))
1767 return; /* Get out of the way quickly, no GP wait! */ 1835 return; /* Get out of the way quickly, no GP wait! */
1836 if (!cur_ops->call)
1837 return; /* Can't do call_rcu() fwd prog without ->call. */
1768 1838
1769 /* Loop continuously posting RCU callbacks. */ 1839 /* Loop continuously posting RCU callbacks. */
1770 WRITE_ONCE(rcu_fwd_cb_nodelay, true); 1840 WRITE_ONCE(rcu_fwd_cb_nodelay, true);
@@ -1805,7 +1875,7 @@ static void rcu_torture_fwd_prog_cr(void)
1805 rfcp->rfc_gps = 0; 1875 rfcp->rfc_gps = 0;
1806 } 1876 }
1807 cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); 1877 cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
1808 cond_resched(); 1878 rcu_torture_fwd_prog_cond_resched();
1809 } 1879 }
1810 stoppedat = jiffies; 1880 stoppedat = jiffies;
1811 n_launders_cb_snap = READ_ONCE(n_launders_cb); 1881 n_launders_cb_snap = READ_ONCE(n_launders_cb);
@@ -1814,7 +1884,6 @@ static void rcu_torture_fwd_prog_cr(void)
1814 cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */ 1884 cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
1815 (void)rcu_torture_fwd_prog_cbfree(); 1885 (void)rcu_torture_fwd_prog_cbfree();
1816 1886
1817 WRITE_ONCE(rcu_fwd_cb_nodelay, false);
1818 if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) { 1887 if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) {
1819 WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED); 1888 WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED);
1820 pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n", 1889 pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n",
@@ -1825,6 +1894,8 @@ static void rcu_torture_fwd_prog_cr(void)
1825 n_max_gps, n_max_cbs, cver, gps); 1894 n_max_gps, n_max_cbs, cver, gps);
1826 rcu_torture_fwd_cb_hist(); 1895 rcu_torture_fwd_cb_hist();
1827 } 1896 }
1897 schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
1898 WRITE_ONCE(rcu_fwd_cb_nodelay, false);
1828} 1899}
1829 1900
1830 1901
@@ -2240,7 +2311,7 @@ rcu_torture_init(void)
2240 int firsterr = 0; 2311 int firsterr = 0;
2241 static struct rcu_torture_ops *torture_ops[] = { 2312 static struct rcu_torture_ops *torture_ops[] = {
2242 &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, 2313 &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
2243 &busted_srcud_ops, &tasks_ops, 2314 &busted_srcud_ops, &tasks_ops, &trivial_ops,
2244 }; 2315 };
2245 2316
2246 if (!torture_init_begin(torture_type, verbose)) 2317 if (!torture_init_begin(torture_type, verbose))
@@ -2363,7 +2434,10 @@ rcu_torture_init(void)
2363 if (stutter < 0) 2434 if (stutter < 0)
2364 stutter = 0; 2435 stutter = 0;
2365 if (stutter) { 2436 if (stutter) {
2366 firsterr = torture_stutter_init(stutter * HZ); 2437 int t;
2438
2439 t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ;
2440 firsterr = torture_stutter_init(stutter * HZ, t);
2367 if (firsterr) 2441 if (firsterr)
2368 goto unwind; 2442 goto unwind;
2369 } 2443 }
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9b761e546de8..cf0e886314f2 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -831,8 +831,8 @@ static void srcu_leak_callback(struct rcu_head *rhp)
831 * srcu_read_lock(), and srcu_read_unlock() that are all passed the same 831 * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
832 * srcu_struct structure. 832 * srcu_struct structure.
833 */ 833 */
834void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, 834static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
835 rcu_callback_t func, bool do_norm) 835 rcu_callback_t func, bool do_norm)
836{ 836{
837 unsigned long flags; 837 unsigned long flags;
838 int idx; 838 int idx;
@@ -1310,3 +1310,68 @@ void __init srcu_init(void)
1310 queue_work(rcu_gp_wq, &ssp->work.work); 1310 queue_work(rcu_gp_wq, &ssp->work.work);
1311 } 1311 }
1312} 1312}
1313
1314#ifdef CONFIG_MODULES
1315
1316/* Initialize any global-scope srcu_struct structures used by this module. */
1317static int srcu_module_coming(struct module *mod)
1318{
1319 int i;
1320 struct srcu_struct **sspp = mod->srcu_struct_ptrs;
1321 int ret;
1322
1323 for (i = 0; i < mod->num_srcu_structs; i++) {
1324 ret = init_srcu_struct(*(sspp++));
1325 if (WARN_ON_ONCE(ret))
1326 return ret;
1327 }
1328 return 0;
1329}
1330
1331/* Clean up any global-scope srcu_struct structures used by this module. */
1332static void srcu_module_going(struct module *mod)
1333{
1334 int i;
1335 struct srcu_struct **sspp = mod->srcu_struct_ptrs;
1336
1337 for (i = 0; i < mod->num_srcu_structs; i++)
1338 cleanup_srcu_struct(*(sspp++));
1339}
1340
1341/* Handle one module, either coming or going. */
1342static int srcu_module_notify(struct notifier_block *self,
1343 unsigned long val, void *data)
1344{
1345 struct module *mod = data;
1346 int ret = 0;
1347
1348 switch (val) {
1349 case MODULE_STATE_COMING:
1350 ret = srcu_module_coming(mod);
1351 break;
1352 case MODULE_STATE_GOING:
1353 srcu_module_going(mod);
1354 break;
1355 default:
1356 break;
1357 }
1358 return ret;
1359}
1360
1361static struct notifier_block srcu_module_nb = {
1362 .notifier_call = srcu_module_notify,
1363 .priority = 0,
1364};
1365
1366static __init int init_srcu_module_notifier(void)
1367{
1368 int ret;
1369
1370 ret = register_module_notifier(&srcu_module_nb);
1371 if (ret)
1372 pr_warn("Failed to register srcu module notifier\n");
1373 return ret;
1374}
1375late_initcall(init_srcu_module_notifier);
1376
1377#endif /* #ifdef CONFIG_MODULES */
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index a8304d90573f..d4558ab7a07d 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -10,65 +10,18 @@
10#include <linux/rcu_sync.h> 10#include <linux/rcu_sync.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12 12
13#ifdef CONFIG_PROVE_RCU 13enum { GP_IDLE = 0, GP_ENTER, GP_PASSED, GP_EXIT, GP_REPLAY };
14#define __INIT_HELD(func) .held = func,
15#else
16#define __INIT_HELD(func)
17#endif
18
19static const struct {
20 void (*sync)(void);
21 void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
22 void (*wait)(void);
23#ifdef CONFIG_PROVE_RCU
24 int (*held)(void);
25#endif
26} gp_ops[] = {
27 [RCU_SYNC] = {
28 .sync = synchronize_rcu,
29 .call = call_rcu,
30 .wait = rcu_barrier,
31 __INIT_HELD(rcu_read_lock_held)
32 },
33 [RCU_SCHED_SYNC] = {
34 .sync = synchronize_rcu,
35 .call = call_rcu,
36 .wait = rcu_barrier,
37 __INIT_HELD(rcu_read_lock_sched_held)
38 },
39 [RCU_BH_SYNC] = {
40 .sync = synchronize_rcu,
41 .call = call_rcu,
42 .wait = rcu_barrier,
43 __INIT_HELD(rcu_read_lock_bh_held)
44 },
45};
46
47enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
48enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
49 14
50#define rss_lock gp_wait.lock 15#define rss_lock gp_wait.lock
51 16
52#ifdef CONFIG_PROVE_RCU
53void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
54{
55 RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
56 "suspicious rcu_sync_is_idle() usage");
57}
58
59EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
60#endif
61
62/** 17/**
63 * rcu_sync_init() - Initialize an rcu_sync structure 18 * rcu_sync_init() - Initialize an rcu_sync structure
64 * @rsp: Pointer to rcu_sync structure to be initialized 19 * @rsp: Pointer to rcu_sync structure to be initialized
65 * @type: Flavor of RCU with which to synchronize rcu_sync structure
66 */ 20 */
67void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) 21void rcu_sync_init(struct rcu_sync *rsp)
68{ 22{
69 memset(rsp, 0, sizeof(*rsp)); 23 memset(rsp, 0, sizeof(*rsp));
70 init_waitqueue_head(&rsp->gp_wait); 24 init_waitqueue_head(&rsp->gp_wait);
71 rsp->gp_type = type;
72} 25}
73 26
74/** 27/**
@@ -86,56 +39,26 @@ void rcu_sync_enter_start(struct rcu_sync *rsp)
86 rsp->gp_state = GP_PASSED; 39 rsp->gp_state = GP_PASSED;
87} 40}
88 41
89/**
90 * rcu_sync_enter() - Force readers onto slowpath
91 * @rsp: Pointer to rcu_sync structure to use for synchronization
92 *
93 * This function is used by updaters who need readers to make use of
94 * a slowpath during the update. After this function returns, all
95 * subsequent calls to rcu_sync_is_idle() will return false, which
96 * tells readers to stay off their fastpaths. A later call to
97 * rcu_sync_exit() re-enables reader slowpaths.
98 *
99 * When called in isolation, rcu_sync_enter() must wait for a grace
100 * period, however, closely spaced calls to rcu_sync_enter() can
101 * optimize away the grace-period wait via a state machine implemented
102 * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
103 */
104void rcu_sync_enter(struct rcu_sync *rsp)
105{
106 bool need_wait, need_sync;
107 42
108 spin_lock_irq(&rsp->rss_lock); 43static void rcu_sync_func(struct rcu_head *rhp);
109 need_wait = rsp->gp_count++;
110 need_sync = rsp->gp_state == GP_IDLE;
111 if (need_sync)
112 rsp->gp_state = GP_PENDING;
113 spin_unlock_irq(&rsp->rss_lock);
114 44
115 WARN_ON_ONCE(need_wait && need_sync); 45static void rcu_sync_call(struct rcu_sync *rsp)
116 if (need_sync) { 46{
117 gp_ops[rsp->gp_type].sync(); 47 call_rcu(&rsp->cb_head, rcu_sync_func);
118 rsp->gp_state = GP_PASSED;
119 wake_up_all(&rsp->gp_wait);
120 } else if (need_wait) {
121 wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED);
122 } else {
123 /*
124 * Possible when there's a pending CB from a rcu_sync_exit().
125 * Nobody has yet been allowed the 'fast' path and thus we can
126 * avoid doing any sync(). The callback will get 'dropped'.
127 */
128 WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
129 }
130} 48}
131 49
132/** 50/**
133 * rcu_sync_func() - Callback function managing reader access to fastpath 51 * rcu_sync_func() - Callback function managing reader access to fastpath
134 * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization 52 * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization
135 * 53 *
136 * This function is passed to one of the call_rcu() functions by 54 * This function is passed to call_rcu() function by rcu_sync_enter() and
137 * rcu_sync_exit(), so that it is invoked after a grace period following the 55 * rcu_sync_exit(), so that it is invoked after a grace period following the
138 * that invocation of rcu_sync_exit(). It takes action based on events that 56 * that invocation of enter/exit.
57 *
58 * If it is called by rcu_sync_enter() it signals that all the readers were
59 * switched onto slow path.
60 *
61 * If it is called by rcu_sync_exit() it takes action based on events that
139 * have taken place in the meantime, so that closely spaced rcu_sync_enter() 62 * have taken place in the meantime, so that closely spaced rcu_sync_enter()
140 * and rcu_sync_exit() pairs need not wait for a grace period. 63 * and rcu_sync_exit() pairs need not wait for a grace period.
141 * 64 *
@@ -152,35 +75,88 @@ static void rcu_sync_func(struct rcu_head *rhp)
152 struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); 75 struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
153 unsigned long flags; 76 unsigned long flags;
154 77
155 WARN_ON_ONCE(rsp->gp_state != GP_PASSED); 78 WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
156 WARN_ON_ONCE(rsp->cb_state == CB_IDLE); 79 WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
157 80
158 spin_lock_irqsave(&rsp->rss_lock, flags); 81 spin_lock_irqsave(&rsp->rss_lock, flags);
159 if (rsp->gp_count) { 82 if (rsp->gp_count) {
160 /* 83 /*
161 * A new rcu_sync_begin() has happened; drop the callback. 84 * We're at least a GP after the GP_IDLE->GP_ENTER transition.
162 */ 85 */
163 rsp->cb_state = CB_IDLE; 86 WRITE_ONCE(rsp->gp_state, GP_PASSED);
164 } else if (rsp->cb_state == CB_REPLAY) { 87 wake_up_locked(&rsp->gp_wait);
88 } else if (rsp->gp_state == GP_REPLAY) {
165 /* 89 /*
166 * A new rcu_sync_exit() has happened; requeue the callback 90 * A new rcu_sync_exit() has happened; requeue the callback to
167 * to catch a later GP. 91 * catch a later GP.
168 */ 92 */
169 rsp->cb_state = CB_PENDING; 93 WRITE_ONCE(rsp->gp_state, GP_EXIT);
170 gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func); 94 rcu_sync_call(rsp);
171 } else { 95 } else {
172 /* 96 /*
173 * We're at least a GP after rcu_sync_exit(); eveybody will now 97 * We're at least a GP after the last rcu_sync_exit(); eveybody
174 * have observed the write side critical section. Let 'em rip!. 98 * will now have observed the write side critical section.
99 * Let 'em rip!.
175 */ 100 */
176 rsp->cb_state = CB_IDLE; 101 WRITE_ONCE(rsp->gp_state, GP_IDLE);
177 rsp->gp_state = GP_IDLE;
178 } 102 }
179 spin_unlock_irqrestore(&rsp->rss_lock, flags); 103 spin_unlock_irqrestore(&rsp->rss_lock, flags);
180} 104}
181 105
182/** 106/**
183 * rcu_sync_exit() - Allow readers back onto fast patch after grace period 107 * rcu_sync_enter() - Force readers onto slowpath
108 * @rsp: Pointer to rcu_sync structure to use for synchronization
109 *
110 * This function is used by updaters who need readers to make use of
111 * a slowpath during the update. After this function returns, all
112 * subsequent calls to rcu_sync_is_idle() will return false, which
113 * tells readers to stay off their fastpaths. A later call to
114 * rcu_sync_exit() re-enables reader slowpaths.
115 *
116 * When called in isolation, rcu_sync_enter() must wait for a grace
117 * period, however, closely spaced calls to rcu_sync_enter() can
118 * optimize away the grace-period wait via a state machine implemented
119 * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
120 */
121void rcu_sync_enter(struct rcu_sync *rsp)
122{
123 int gp_state;
124
125 spin_lock_irq(&rsp->rss_lock);
126 gp_state = rsp->gp_state;
127 if (gp_state == GP_IDLE) {
128 WRITE_ONCE(rsp->gp_state, GP_ENTER);
129 WARN_ON_ONCE(rsp->gp_count);
130 /*
131 * Note that we could simply do rcu_sync_call(rsp) here and
132 * avoid the "if (gp_state == GP_IDLE)" block below.
133 *
134 * However, synchronize_rcu() can be faster if rcu_expedited
135 * or rcu_blocking_is_gp() is true.
136 *
137 * Another reason is that we can't wait for rcu callback if
138 * we are called at early boot time but this shouldn't happen.
139 */
140 }
141 rsp->gp_count++;
142 spin_unlock_irq(&rsp->rss_lock);
143
144 if (gp_state == GP_IDLE) {
145 /*
146 * See the comment above, this simply does the "synchronous"
147 * call_rcu(rcu_sync_func) which does GP_ENTER -> GP_PASSED.
148 */
149 synchronize_rcu();
150 rcu_sync_func(&rsp->cb_head);
151 /* Not really needed, wait_event() would see GP_PASSED. */
152 return;
153 }
154
155 wait_event(rsp->gp_wait, READ_ONCE(rsp->gp_state) >= GP_PASSED);
156}
157
158/**
159 * rcu_sync_exit() - Allow readers back onto fast path after grace period
184 * @rsp: Pointer to rcu_sync structure to use for synchronization 160 * @rsp: Pointer to rcu_sync structure to use for synchronization
185 * 161 *
186 * This function is used by updaters who have completed, and can therefore 162 * This function is used by updaters who have completed, and can therefore
@@ -191,13 +167,16 @@ static void rcu_sync_func(struct rcu_head *rhp)
191 */ 167 */
192void rcu_sync_exit(struct rcu_sync *rsp) 168void rcu_sync_exit(struct rcu_sync *rsp)
193{ 169{
170 WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
171 WARN_ON_ONCE(READ_ONCE(rsp->gp_count) == 0);
172
194 spin_lock_irq(&rsp->rss_lock); 173 spin_lock_irq(&rsp->rss_lock);
195 if (!--rsp->gp_count) { 174 if (!--rsp->gp_count) {
196 if (rsp->cb_state == CB_IDLE) { 175 if (rsp->gp_state == GP_PASSED) {
197 rsp->cb_state = CB_PENDING; 176 WRITE_ONCE(rsp->gp_state, GP_EXIT);
198 gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func); 177 rcu_sync_call(rsp);
199 } else if (rsp->cb_state == CB_PENDING) { 178 } else if (rsp->gp_state == GP_EXIT) {
200 rsp->cb_state = CB_REPLAY; 179 WRITE_ONCE(rsp->gp_state, GP_REPLAY);
201 } 180 }
202 } 181 }
203 spin_unlock_irq(&rsp->rss_lock); 182 spin_unlock_irq(&rsp->rss_lock);
@@ -209,18 +188,19 @@ void rcu_sync_exit(struct rcu_sync *rsp)
209 */ 188 */
210void rcu_sync_dtor(struct rcu_sync *rsp) 189void rcu_sync_dtor(struct rcu_sync *rsp)
211{ 190{
212 int cb_state; 191 int gp_state;
213 192
214 WARN_ON_ONCE(rsp->gp_count); 193 WARN_ON_ONCE(READ_ONCE(rsp->gp_count));
194 WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
215 195
216 spin_lock_irq(&rsp->rss_lock); 196 spin_lock_irq(&rsp->rss_lock);
217 if (rsp->cb_state == CB_REPLAY) 197 if (rsp->gp_state == GP_REPLAY)
218 rsp->cb_state = CB_PENDING; 198 WRITE_ONCE(rsp->gp_state, GP_EXIT);
219 cb_state = rsp->cb_state; 199 gp_state = rsp->gp_state;
220 spin_unlock_irq(&rsp->rss_lock); 200 spin_unlock_irq(&rsp->rss_lock);
221 201
222 if (cb_state != CB_IDLE) { 202 if (gp_state != GP_IDLE) {
223 gp_ops[rsp->gp_type].wait(); 203 rcu_barrier();
224 WARN_ON_ONCE(rsp->cb_state != CB_IDLE); 204 WARN_ON_ONCE(rsp->gp_state != GP_IDLE);
225 } 205 }
226} 206}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 980ca3ca643f..a14e5fbbea46 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -51,6 +51,12 @@
51#include <linux/tick.h> 51#include <linux/tick.h>
52#include <linux/sysrq.h> 52#include <linux/sysrq.h>
53#include <linux/kprobes.h> 53#include <linux/kprobes.h>
54#include <linux/gfp.h>
55#include <linux/oom.h>
56#include <linux/smpboot.h>
57#include <linux/jiffies.h>
58#include <linux/sched/isolation.h>
59#include "../time/tick-internal.h"
54 60
55#include "tree.h" 61#include "tree.h"
56#include "rcu.h" 62#include "rcu.h"
@@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
92/* Dump rcu_node combining tree at boot to verify correct setup. */ 98/* Dump rcu_node combining tree at boot to verify correct setup. */
93static bool dump_tree; 99static bool dump_tree;
94module_param(dump_tree, bool, 0444); 100module_param(dump_tree, bool, 0444);
101/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
102static bool use_softirq = 1;
103module_param(use_softirq, bool, 0444);
95/* Control rcu_node-tree auto-balancing at boot time. */ 104/* Control rcu_node-tree auto-balancing at boot time. */
96static bool rcu_fanout_exact; 105static bool rcu_fanout_exact;
97module_param(rcu_fanout_exact, bool, 0444); 106module_param(rcu_fanout_exact, bool, 0444);
@@ -138,7 +147,6 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
138static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); 147static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
139static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 148static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
140static void invoke_rcu_core(void); 149static void invoke_rcu_core(void);
141static void invoke_rcu_callbacks(struct rcu_data *rdp);
142static void rcu_report_exp_rdp(struct rcu_data *rdp); 150static void rcu_report_exp_rdp(struct rcu_data *rdp);
143static void sync_sched_exp_online_cleanup(int cpu); 151static void sync_sched_exp_online_cleanup(int cpu);
144 152
@@ -368,19 +376,33 @@ static void __maybe_unused rcu_momentary_dyntick_idle(void)
368} 376}
369 377
370/** 378/**
371 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle 379 * rcu_is_cpu_rrupt_from_idle - see if interrupted from idle
372 * 380 *
373 * If the current CPU is idle or running at a first-level (not nested) 381 * If the current CPU is idle and running at a first-level (not nested)
374 * interrupt from idle, return true. The caller must have at least 382 * interrupt from idle, return true. The caller must have at least
375 * disabled preemption. 383 * disabled preemption.
376 */ 384 */
377static int rcu_is_cpu_rrupt_from_idle(void) 385static int rcu_is_cpu_rrupt_from_idle(void)
378{ 386{
379 return __this_cpu_read(rcu_data.dynticks_nesting) <= 0 && 387 /* Called only from within the scheduling-clock interrupt */
380 __this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 1; 388 lockdep_assert_in_irq();
389
390 /* Check for counter underflows */
391 RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
392 "RCU dynticks_nesting counter underflow!");
393 RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0,
394 "RCU dynticks_nmi_nesting counter underflow/zero!");
395
396 /* Are we at first interrupt nesting level? */
397 if (__this_cpu_read(rcu_data.dynticks_nmi_nesting) != 1)
398 return false;
399
400 /* Does CPU appear to be idle from an RCU standpoint? */
401 return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
381} 402}
382 403
383#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */ 404#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */
405#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */
384static long blimit = DEFAULT_RCU_BLIMIT; 406static long blimit = DEFAULT_RCU_BLIMIT;
385#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */ 407#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
386static long qhimark = DEFAULT_RCU_QHIMARK; 408static long qhimark = DEFAULT_RCU_QHIMARK;
@@ -2113,7 +2135,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
2113 2135
2114 /* Reinstate batch limit if we have worked down the excess. */ 2136 /* Reinstate batch limit if we have worked down the excess. */
2115 count = rcu_segcblist_n_cbs(&rdp->cblist); 2137 count = rcu_segcblist_n_cbs(&rdp->cblist);
2116 if (rdp->blimit == LONG_MAX && count <= qlowmark) 2138 if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)
2117 rdp->blimit = blimit; 2139 rdp->blimit = blimit;
2118 2140
2119 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 2141 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
@@ -2253,7 +2275,7 @@ void rcu_force_quiescent_state(void)
2253EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 2275EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
2254 2276
2255/* Perform RCU core processing work for the current CPU. */ 2277/* Perform RCU core processing work for the current CPU. */
2256static __latent_entropy void rcu_core(struct softirq_action *unused) 2278static __latent_entropy void rcu_core(void)
2257{ 2279{
2258 unsigned long flags; 2280 unsigned long flags;
2259 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data); 2281 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@@ -2287,37 +2309,126 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
2287 rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check()); 2309 rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
2288 2310
2289 /* If there are callbacks ready, invoke them. */ 2311 /* If there are callbacks ready, invoke them. */
2290 if (rcu_segcblist_ready_cbs(&rdp->cblist)) 2312 if (rcu_segcblist_ready_cbs(&rdp->cblist) &&
2291 invoke_rcu_callbacks(rdp); 2313 likely(READ_ONCE(rcu_scheduler_fully_active)))
2314 rcu_do_batch(rdp);
2292 2315
2293 /* Do any needed deferred wakeups of rcuo kthreads. */ 2316 /* Do any needed deferred wakeups of rcuo kthreads. */
2294 do_nocb_deferred_wakeup(rdp); 2317 do_nocb_deferred_wakeup(rdp);
2295 trace_rcu_utilization(TPS("End RCU core")); 2318 trace_rcu_utilization(TPS("End RCU core"));
2296} 2319}
2297 2320
2321static void rcu_core_si(struct softirq_action *h)
2322{
2323 rcu_core();
2324}
2325
2326static void rcu_wake_cond(struct task_struct *t, int status)
2327{
2328 /*
2329 * If the thread is yielding, only wake it when this
2330 * is invoked from idle
2331 */
2332 if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
2333 wake_up_process(t);
2334}
2335
2336static void invoke_rcu_core_kthread(void)
2337{
2338 struct task_struct *t;
2339 unsigned long flags;
2340
2341 local_irq_save(flags);
2342 __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
2343 t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
2344 if (t != NULL && t != current)
2345 rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
2346 local_irq_restore(flags);
2347}
2348
2298/* 2349/*
2299 * Schedule RCU callback invocation. If the running implementation of RCU 2350 * Wake up this CPU's rcuc kthread to do RCU core processing.
2300 * does not support RCU priority boosting, just do a direct call, otherwise
2301 * wake up the per-CPU kernel kthread. Note that because we are running
2302 * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
2303 * cannot disappear out from under us.
2304 */ 2351 */
2305static void invoke_rcu_callbacks(struct rcu_data *rdp) 2352static void invoke_rcu_core(void)
2306{ 2353{
2307 if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) 2354 if (!cpu_online(smp_processor_id()))
2308 return;
2309 if (likely(!rcu_state.boost)) {
2310 rcu_do_batch(rdp);
2311 return; 2355 return;
2356 if (use_softirq)
2357 raise_softirq(RCU_SOFTIRQ);
2358 else
2359 invoke_rcu_core_kthread();
2360}
2361
2362static void rcu_cpu_kthread_park(unsigned int cpu)
2363{
2364 per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
2365}
2366
2367static int rcu_cpu_kthread_should_run(unsigned int cpu)
2368{
2369 return __this_cpu_read(rcu_data.rcu_cpu_has_work);
2370}
2371
2372/*
2373 * Per-CPU kernel thread that invokes RCU callbacks. This replaces
2374 * the RCU softirq used in configurations of RCU that do not support RCU
2375 * priority boosting.
2376 */
2377static void rcu_cpu_kthread(unsigned int cpu)
2378{
2379 unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
2380 char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
2381 int spincnt;
2382
2383 for (spincnt = 0; spincnt < 10; spincnt++) {
2384 trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
2385 local_bh_disable();
2386 *statusp = RCU_KTHREAD_RUNNING;
2387 local_irq_disable();
2388 work = *workp;
2389 *workp = 0;
2390 local_irq_enable();
2391 if (work)
2392 rcu_core();
2393 local_bh_enable();
2394 if (*workp == 0) {
2395 trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
2396 *statusp = RCU_KTHREAD_WAITING;
2397 return;
2398 }
2312 } 2399 }
2313 invoke_rcu_callbacks_kthread(); 2400 *statusp = RCU_KTHREAD_YIELDING;
2401 trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
2402 schedule_timeout_interruptible(2);
2403 trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
2404 *statusp = RCU_KTHREAD_WAITING;
2314} 2405}
2315 2406
2316static void invoke_rcu_core(void) 2407static struct smp_hotplug_thread rcu_cpu_thread_spec = {
2408 .store = &rcu_data.rcu_cpu_kthread_task,
2409 .thread_should_run = rcu_cpu_kthread_should_run,
2410 .thread_fn = rcu_cpu_kthread,
2411 .thread_comm = "rcuc/%u",
2412 .setup = rcu_cpu_kthread_setup,
2413 .park = rcu_cpu_kthread_park,
2414};
2415
2416/*
2417 * Spawn per-CPU RCU core processing kthreads.
2418 */
2419static int __init rcu_spawn_core_kthreads(void)
2317{ 2420{
2318 if (cpu_online(smp_processor_id())) 2421 int cpu;
2319 raise_softirq(RCU_SOFTIRQ); 2422
2423 for_each_possible_cpu(cpu)
2424 per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
2425 if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
2426 return 0;
2427 WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
2428 "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
2429 return 0;
2320} 2430}
2431early_initcall(rcu_spawn_core_kthreads);
2321 2432
2322/* 2433/*
2323 * Handle any core-RCU processing required by a call_rcu() invocation. 2434 * Handle any core-RCU processing required by a call_rcu() invocation.
@@ -2354,7 +2465,7 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
2354 rcu_accelerate_cbs_unlocked(rdp->mynode, rdp); 2465 rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);
2355 } else { 2466 } else {
2356 /* Give the grace period a kick. */ 2467 /* Give the grace period a kick. */
2357 rdp->blimit = LONG_MAX; 2468 rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;
2358 if (rcu_state.n_force_qs == rdp->n_force_qs_snap && 2469 if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
2359 rcu_segcblist_first_pend_cb(&rdp->cblist) != head) 2470 rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
2360 rcu_force_quiescent_state(); 2471 rcu_force_quiescent_state();
@@ -3355,7 +3466,8 @@ void __init rcu_init(void)
3355 rcu_init_one(); 3466 rcu_init_one();
3356 if (dump_tree) 3467 if (dump_tree)
3357 rcu_dump_rcu_node_tree(); 3468 rcu_dump_rcu_node_tree();
3358 open_softirq(RCU_SOFTIRQ, rcu_core); 3469 if (use_softirq)
3470 open_softirq(RCU_SOFTIRQ, rcu_core_si);
3359 3471
3360 /* 3472 /*
3361 * We don't need protection against CPU-hotplug here because 3473 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e253d11af3c4..7acaf3a62d39 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -154,13 +154,15 @@ struct rcu_data {
154 bool core_needs_qs; /* Core waits for quiesc state. */ 154 bool core_needs_qs; /* Core waits for quiesc state. */
155 bool beenonline; /* CPU online at least once. */ 155 bool beenonline; /* CPU online at least once. */
156 bool gpwrap; /* Possible ->gp_seq wrap. */ 156 bool gpwrap; /* Possible ->gp_seq wrap. */
157 bool deferred_qs; /* This CPU awaiting a deferred QS? */ 157 bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
158 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 158 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
159 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 159 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
160 unsigned long ticks_this_gp; /* The number of scheduling-clock */ 160 unsigned long ticks_this_gp; /* The number of scheduling-clock */
161 /* ticks this CPU has handled */ 161 /* ticks this CPU has handled */
162 /* during and after the last grace */ 162 /* during and after the last grace */
163 /* period it is aware of. */ 163 /* period it is aware of. */
164 struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
165 bool defer_qs_iw_pending; /* Scheduler attention pending? */
164 166
165 /* 2) batch handling */ 167 /* 2) batch handling */
166 struct rcu_segcblist cblist; /* Segmented callback list, with */ 168 struct rcu_segcblist cblist; /* Segmented callback list, with */
@@ -407,8 +409,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
407static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck); 409static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
408static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 410static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
409static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 411static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
410static void invoke_rcu_callbacks_kthread(void);
411static bool rcu_is_callbacks_kthread(void); 412static bool rcu_is_callbacks_kthread(void);
413static void rcu_cpu_kthread_setup(unsigned int cpu);
412static void __init rcu_spawn_boost_kthreads(void); 414static void __init rcu_spawn_boost_kthreads(void);
413static void rcu_prepare_kthreads(int cpu); 415static void rcu_prepare_kthreads(int cpu);
414static void rcu_cleanup_after_idle(void); 416static void rcu_cleanup_after_idle(void);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 9c990df880d1..af7e7b9c86af 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -250,7 +250,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
250 */ 250 */
251static void rcu_report_exp_rdp(struct rcu_data *rdp) 251static void rcu_report_exp_rdp(struct rcu_data *rdp)
252{ 252{
253 WRITE_ONCE(rdp->deferred_qs, false); 253 WRITE_ONCE(rdp->exp_deferred_qs, false);
254 rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true); 254 rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
255} 255}
256 256
@@ -259,8 +259,7 @@ static bool sync_exp_work_done(unsigned long s)
259{ 259{
260 if (rcu_exp_gp_seq_done(s)) { 260 if (rcu_exp_gp_seq_done(s)) {
261 trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done")); 261 trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
262 /* Ensure test happens before caller kfree(). */ 262 smp_mb(); /* Ensure test happens before caller kfree(). */
263 smp_mb__before_atomic(); /* ^^^ */
264 return true; 263 return true;
265 } 264 }
266 return false; 265 return false;
@@ -384,7 +383,12 @@ retry_ipi:
384 mask_ofl_test |= mask; 383 mask_ofl_test |= mask;
385 continue; 384 continue;
386 } 385 }
386 if (get_cpu() == cpu) {
387 put_cpu();
388 continue;
389 }
387 ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0); 390 ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
391 put_cpu();
388 if (!ret) { 392 if (!ret) {
389 mask_ofl_ipi &= ~mask; 393 mask_ofl_ipi &= ~mask;
390 continue; 394 continue;
@@ -611,7 +615,7 @@ static void rcu_exp_handler(void *unused)
611 rcu_dynticks_curr_cpu_in_eqs()) { 615 rcu_dynticks_curr_cpu_in_eqs()) {
612 rcu_report_exp_rdp(rdp); 616 rcu_report_exp_rdp(rdp);
613 } else { 617 } else {
614 rdp->deferred_qs = true; 618 rdp->exp_deferred_qs = true;
615 set_tsk_need_resched(t); 619 set_tsk_need_resched(t);
616 set_preempt_need_resched(); 620 set_preempt_need_resched();
617 } 621 }
@@ -633,7 +637,7 @@ static void rcu_exp_handler(void *unused)
633 if (t->rcu_read_lock_nesting > 0) { 637 if (t->rcu_read_lock_nesting > 0) {
634 raw_spin_lock_irqsave_rcu_node(rnp, flags); 638 raw_spin_lock_irqsave_rcu_node(rnp, flags);
635 if (rnp->expmask & rdp->grpmask) { 639 if (rnp->expmask & rdp->grpmask) {
636 rdp->deferred_qs = true; 640 rdp->exp_deferred_qs = true;
637 t->rcu_read_unlock_special.b.exp_hint = true; 641 t->rcu_read_unlock_special.b.exp_hint = true;
638 } 642 }
639 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 643 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -656,7 +660,7 @@ static void rcu_exp_handler(void *unused)
656 * 660 *
657 * Otherwise, force a context switch after the CPU enables everything. 661 * Otherwise, force a context switch after the CPU enables everything.
658 */ 662 */
659 rdp->deferred_qs = true; 663 rdp->exp_deferred_qs = true;
660 if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) || 664 if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
661 WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) { 665 WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) {
662 rcu_preempt_deferred_qs(t); 666 rcu_preempt_deferred_qs(t);
@@ -694,6 +698,16 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
694 698
695#else /* #ifdef CONFIG_PREEMPT_RCU */ 699#else /* #ifdef CONFIG_PREEMPT_RCU */
696 700
701/* Request an expedited quiescent state. */
702static void rcu_exp_need_qs(void)
703{
704 __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
705 /* Store .exp before .rcu_urgent_qs. */
706 smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
707 set_tsk_need_resched(current);
708 set_preempt_need_resched();
709}
710
697/* Invoked on each online non-idle CPU for expedited quiescent state. */ 711/* Invoked on each online non-idle CPU for expedited quiescent state. */
698static void rcu_exp_handler(void *unused) 712static void rcu_exp_handler(void *unused)
699{ 713{
@@ -709,25 +723,38 @@ static void rcu_exp_handler(void *unused)
709 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); 723 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
710 return; 724 return;
711 } 725 }
712 __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true); 726 rcu_exp_need_qs();
713 /* Store .exp before .rcu_urgent_qs. */
714 smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
715 set_tsk_need_resched(current);
716 set_preempt_need_resched();
717} 727}
718 728
719/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */ 729/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
720static void sync_sched_exp_online_cleanup(int cpu) 730static void sync_sched_exp_online_cleanup(int cpu)
721{ 731{
732 unsigned long flags;
733 int my_cpu;
722 struct rcu_data *rdp; 734 struct rcu_data *rdp;
723 int ret; 735 int ret;
724 struct rcu_node *rnp; 736 struct rcu_node *rnp;
725 737
726 rdp = per_cpu_ptr(&rcu_data, cpu); 738 rdp = per_cpu_ptr(&rcu_data, cpu);
727 rnp = rdp->mynode; 739 rnp = rdp->mynode;
728 if (!(READ_ONCE(rnp->expmask) & rdp->grpmask)) 740 my_cpu = get_cpu();
741 /* Quiescent state either not needed or already requested, leave. */
742 if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
743 __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) {
744 put_cpu();
745 return;
746 }
747 /* Quiescent state needed on current CPU, so set it up locally. */
748 if (my_cpu == cpu) {
749 local_irq_save(flags);
750 rcu_exp_need_qs();
751 local_irq_restore(flags);
752 put_cpu();
729 return; 753 return;
754 }
755 /* Quiescent state needed on some other CPU, send IPI. */
730 ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0); 756 ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
757 put_cpu();
731 WARN_ON_ONCE(ret); 758 WARN_ON_ONCE(ret);
732} 759}
733 760
@@ -765,7 +792,6 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
765 */ 792 */
766void synchronize_rcu_expedited(void) 793void synchronize_rcu_expedited(void)
767{ 794{
768 struct rcu_data *rdp;
769 struct rcu_exp_work rew; 795 struct rcu_exp_work rew;
770 struct rcu_node *rnp; 796 struct rcu_node *rnp;
771 unsigned long s; 797 unsigned long s;
@@ -802,7 +828,6 @@ void synchronize_rcu_expedited(void)
802 } 828 }
803 829
804 /* Wait for expedited grace period to complete. */ 830 /* Wait for expedited grace period to complete. */
805 rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
806 rnp = rcu_get_root(); 831 rnp = rcu_get_root();
807 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], 832 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
808 sync_exp_work_done(s)); 833 sync_exp_work_done(s));
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 1102765f91fd..acb225023ed1 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -11,29 +11,7 @@
11 * Paul E. McKenney <paulmck@linux.ibm.com> 11 * Paul E. McKenney <paulmck@linux.ibm.com>
12 */ 12 */
13 13
14#include <linux/delay.h>
15#include <linux/gfp.h>
16#include <linux/oom.h>
17#include <linux/sched/debug.h>
18#include <linux/smpboot.h>
19#include <linux/sched/isolation.h>
20#include <uapi/linux/sched/types.h>
21#include "../time/tick-internal.h"
22
23#ifdef CONFIG_RCU_BOOST
24#include "../locking/rtmutex_common.h" 14#include "../locking/rtmutex_common.h"
25#else /* #ifdef CONFIG_RCU_BOOST */
26
27/*
28 * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
29 * all uses are in dead code. Provide a definition to keep the compiler
30 * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
31 * This probably needs to be excluded from -rt builds.
32 */
33#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
34#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
35
36#endif /* #else #ifdef CONFIG_RCU_BOOST */
37 15
38#ifdef CONFIG_RCU_NOCB_CPU 16#ifdef CONFIG_RCU_NOCB_CPU
39static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ 17static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
94 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); 72 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
95 if (gp_cleanup_delay) 73 if (gp_cleanup_delay)
96 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay); 74 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
75 if (!use_softirq)
76 pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
97 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) 77 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
98 pr_info("\tRCU debug extended QS entry/exit.\n"); 78 pr_info("\tRCU debug extended QS entry/exit.\n");
99 rcupdate_announce_bootup_oddness(); 79 rcupdate_announce_bootup_oddness();
@@ -257,10 +237,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
257 * no need to check for a subsequent expedited GP. (Though we are 237 * no need to check for a subsequent expedited GP. (Though we are
258 * still in a quiescent state in any case.) 238 * still in a quiescent state in any case.)
259 */ 239 */
260 if (blkd_state & RCU_EXP_BLKD && rdp->deferred_qs) 240 if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
261 rcu_report_exp_rdp(rdp); 241 rcu_report_exp_rdp(rdp);
262 else 242 else
263 WARN_ON_ONCE(rdp->deferred_qs); 243 WARN_ON_ONCE(rdp->exp_deferred_qs);
264} 244}
265 245
266/* 246/*
@@ -357,7 +337,7 @@ void rcu_note_context_switch(bool preempt)
357 * means that we continue to block the current grace period. 337 * means that we continue to block the current grace period.
358 */ 338 */
359 rcu_qs(); 339 rcu_qs();
360 if (rdp->deferred_qs) 340 if (rdp->exp_deferred_qs)
361 rcu_report_exp_rdp(rdp); 341 rcu_report_exp_rdp(rdp);
362 trace_rcu_utilization(TPS("End context switch")); 342 trace_rcu_utilization(TPS("End context switch"));
363 barrier(); /* Avoid RCU read-side critical sections leaking up. */ 343 barrier(); /* Avoid RCU read-side critical sections leaking up. */
@@ -471,14 +451,15 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
471 */ 451 */
472 special = t->rcu_read_unlock_special; 452 special = t->rcu_read_unlock_special;
473 rdp = this_cpu_ptr(&rcu_data); 453 rdp = this_cpu_ptr(&rcu_data);
474 if (!special.s && !rdp->deferred_qs) { 454 if (!special.s && !rdp->exp_deferred_qs) {
475 local_irq_restore(flags); 455 local_irq_restore(flags);
476 return; 456 return;
477 } 457 }
458 t->rcu_read_unlock_special.b.deferred_qs = false;
478 if (special.b.need_qs) { 459 if (special.b.need_qs) {
479 rcu_qs(); 460 rcu_qs();
480 t->rcu_read_unlock_special.b.need_qs = false; 461 t->rcu_read_unlock_special.b.need_qs = false;
481 if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) { 462 if (!t->rcu_read_unlock_special.s && !rdp->exp_deferred_qs) {
482 local_irq_restore(flags); 463 local_irq_restore(flags);
483 return; 464 return;
484 } 465 }
@@ -490,7 +471,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
490 * tasks are handled when removing the task from the 471 * tasks are handled when removing the task from the
491 * blocked-tasks list below. 472 * blocked-tasks list below.
492 */ 473 */
493 if (rdp->deferred_qs) { 474 if (rdp->exp_deferred_qs) {
494 rcu_report_exp_rdp(rdp); 475 rcu_report_exp_rdp(rdp);
495 if (!t->rcu_read_unlock_special.s) { 476 if (!t->rcu_read_unlock_special.s) {
496 local_irq_restore(flags); 477 local_irq_restore(flags);
@@ -579,7 +560,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
579 */ 560 */
580static bool rcu_preempt_need_deferred_qs(struct task_struct *t) 561static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
581{ 562{
582 return (__this_cpu_read(rcu_data.deferred_qs) || 563 return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
583 READ_ONCE(t->rcu_read_unlock_special.s)) && 564 READ_ONCE(t->rcu_read_unlock_special.s)) &&
584 t->rcu_read_lock_nesting <= 0; 565 t->rcu_read_lock_nesting <= 0;
585} 566}
@@ -607,6 +588,17 @@ static void rcu_preempt_deferred_qs(struct task_struct *t)
607} 588}
608 589
609/* 590/*
591 * Minimal handler to give the scheduler a chance to re-evaluate.
592 */
593static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
594{
595 struct rcu_data *rdp;
596
597 rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
598 rdp->defer_qs_iw_pending = false;
599}
600
601/*
610 * Handle special cases during rcu_read_unlock(), such as needing to 602 * Handle special cases during rcu_read_unlock(), such as needing to
611 * notify RCU core processing or task having blocked during the RCU 603 * notify RCU core processing or task having blocked during the RCU
612 * read-side critical section. 604 * read-side critical section.
@@ -625,16 +617,41 @@ static void rcu_read_unlock_special(struct task_struct *t)
625 local_irq_save(flags); 617 local_irq_save(flags);
626 irqs_were_disabled = irqs_disabled_flags(flags); 618 irqs_were_disabled = irqs_disabled_flags(flags);
627 if (preempt_bh_were_disabled || irqs_were_disabled) { 619 if (preempt_bh_were_disabled || irqs_were_disabled) {
628 WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); 620 bool exp;
629 /* Need to defer quiescent state until everything is enabled. */ 621 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
630 if (irqs_were_disabled) { 622 struct rcu_node *rnp = rdp->mynode;
631 /* Enabling irqs does not reschedule, so... */ 623
624 t->rcu_read_unlock_special.b.exp_hint = false;
625 exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
626 (rdp->grpmask & rnp->expmask) ||
627 tick_nohz_full_cpu(rdp->cpu);
628 // Need to defer quiescent state until everything is enabled.
629 if ((exp || in_irq()) && irqs_were_disabled && use_softirq &&
630 (in_irq() || !t->rcu_read_unlock_special.b.deferred_qs)) {
631 // Using softirq, safe to awaken, and we get
632 // no help from enabling irqs, unlike bh/preempt.
632 raise_softirq_irqoff(RCU_SOFTIRQ); 633 raise_softirq_irqoff(RCU_SOFTIRQ);
634 } else if (exp && irqs_were_disabled && !use_softirq &&
635 !t->rcu_read_unlock_special.b.deferred_qs) {
636 // Safe to awaken and we get no help from enabling
637 // irqs, unlike bh/preempt.
638 invoke_rcu_core();
633 } else { 639 } else {
634 /* Enabling BH or preempt does reschedule, so... */ 640 // Enabling BH or preempt does reschedule, so...
641 // Also if no expediting or NO_HZ_FULL, slow is OK.
635 set_tsk_need_resched(current); 642 set_tsk_need_resched(current);
636 set_preempt_need_resched(); 643 set_preempt_need_resched();
644 if (IS_ENABLED(CONFIG_IRQ_WORK) &&
645 !rdp->defer_qs_iw_pending && exp) {
646 // Get scheduler to re-evaluate and call hooks.
647 // If !IRQ_WORK, FQS scan will eventually IPI.
648 init_irq_work(&rdp->defer_qs_iw,
649 rcu_preempt_deferred_qs_handler);
650 rdp->defer_qs_iw_pending = true;
651 irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
652 }
637 } 653 }
654 t->rcu_read_unlock_special.b.deferred_qs = true;
638 local_irq_restore(flags); 655 local_irq_restore(flags);
639 return; 656 return;
640 } 657 }
@@ -760,7 +777,7 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
760 i = 0; 777 i = 0;
761 list_for_each(lhp, &rnp->blkd_tasks) { 778 list_for_each(lhp, &rnp->blkd_tasks) {
762 pr_cont(" %p", lhp); 779 pr_cont(" %p", lhp);
763 if (++i >= 10) 780 if (++i >= ncheck)
764 break; 781 break;
765 } 782 }
766 pr_cont("\n"); 783 pr_cont("\n");
@@ -944,18 +961,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
944 961
945#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 962#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
946 963
964/*
965 * If boosting, set rcuc kthreads to realtime priority.
966 */
967static void rcu_cpu_kthread_setup(unsigned int cpu)
968{
947#ifdef CONFIG_RCU_BOOST 969#ifdef CONFIG_RCU_BOOST
970 struct sched_param sp;
948 971
949static void rcu_wake_cond(struct task_struct *t, int status) 972 sp.sched_priority = kthread_prio;
950{ 973 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
951 /* 974#endif /* #ifdef CONFIG_RCU_BOOST */
952 * If the thread is yielding, only wake it when this
953 * is invoked from idle
954 */
955 if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
956 wake_up_process(t);
957} 975}
958 976
977#ifdef CONFIG_RCU_BOOST
978
959/* 979/*
960 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 980 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
961 * or ->boost_tasks, advancing the pointer to the next task in the 981 * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1091,23 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1091} 1111}
1092 1112
1093/* 1113/*
1094 * Wake up the per-CPU kthread to invoke RCU callbacks.
1095 */
1096static void invoke_rcu_callbacks_kthread(void)
1097{
1098 unsigned long flags;
1099
1100 local_irq_save(flags);
1101 __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
1102 if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
1103 current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
1104 rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
1105 __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
1106 }
1107 local_irq_restore(flags);
1108}
1109
1110/*
1111 * Is the current CPU running the RCU-callbacks kthread? 1114 * Is the current CPU running the RCU-callbacks kthread?
1112 * Caller must have preemption disabled. 1115 * Caller must have preemption disabled.
1113 */ 1116 */
@@ -1160,59 +1163,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
1160 return 0; 1163 return 0;
1161} 1164}
1162 1165
1163static void rcu_cpu_kthread_setup(unsigned int cpu)
1164{
1165 struct sched_param sp;
1166
1167 sp.sched_priority = kthread_prio;
1168 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1169}
1170
1171static void rcu_cpu_kthread_park(unsigned int cpu)
1172{
1173 per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1174}
1175
1176static int rcu_cpu_kthread_should_run(unsigned int cpu)
1177{
1178 return __this_cpu_read(rcu_data.rcu_cpu_has_work);
1179}
1180
1181/*
1182 * Per-CPU kernel thread that invokes RCU callbacks. This replaces
1183 * the RCU softirq used in configurations of RCU that do not support RCU
1184 * priority boosting.
1185 */
1186static void rcu_cpu_kthread(unsigned int cpu)
1187{
1188 unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
1189 char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
1190 int spincnt;
1191
1192 for (spincnt = 0; spincnt < 10; spincnt++) {
1193 trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
1194 local_bh_disable();
1195 *statusp = RCU_KTHREAD_RUNNING;
1196 local_irq_disable();
1197 work = *workp;
1198 *workp = 0;
1199 local_irq_enable();
1200 if (work)
1201 rcu_do_batch(this_cpu_ptr(&rcu_data));
1202 local_bh_enable();
1203 if (*workp == 0) {
1204 trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
1205 *statusp = RCU_KTHREAD_WAITING;
1206 return;
1207 }
1208 }
1209 *statusp = RCU_KTHREAD_YIELDING;
1210 trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
1211 schedule_timeout_interruptible(2);
1212 trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
1213 *statusp = RCU_KTHREAD_WAITING;
1214}
1215
1216/* 1166/*
1217 * Set the per-rcu_node kthread's affinity to cover all CPUs that are 1167 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1218 * served by the rcu_node in question. The CPU hotplug lock is still 1168 * served by the rcu_node in question. The CPU hotplug lock is still
@@ -1243,27 +1193,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1243 free_cpumask_var(cm); 1193 free_cpumask_var(cm);
1244} 1194}
1245 1195
1246static struct smp_hotplug_thread rcu_cpu_thread_spec = {
1247 .store = &rcu_data.rcu_cpu_kthread_task,
1248 .thread_should_run = rcu_cpu_kthread_should_run,
1249 .thread_fn = rcu_cpu_kthread,
1250 .thread_comm = "rcuc/%u",
1251 .setup = rcu_cpu_kthread_setup,
1252 .park = rcu_cpu_kthread_park,
1253};
1254
1255/* 1196/*
1256 * Spawn boost kthreads -- called as soon as the scheduler is running. 1197 * Spawn boost kthreads -- called as soon as the scheduler is running.
1257 */ 1198 */
1258static void __init rcu_spawn_boost_kthreads(void) 1199static void __init rcu_spawn_boost_kthreads(void)
1259{ 1200{
1260 struct rcu_node *rnp; 1201 struct rcu_node *rnp;
1261 int cpu;
1262 1202
1263 for_each_possible_cpu(cpu)
1264 per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
1265 if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
1266 return;
1267 rcu_for_each_leaf_node(rnp) 1203 rcu_for_each_leaf_node(rnp)
1268 (void)rcu_spawn_one_boost_kthread(rnp); 1204 (void)rcu_spawn_one_boost_kthread(rnp);
1269} 1205}
@@ -1286,11 +1222,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1286 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1222 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1287} 1223}
1288 1224
1289static void invoke_rcu_callbacks_kthread(void)
1290{
1291 WARN_ON_ONCE(1);
1292}
1293
1294static bool rcu_is_callbacks_kthread(void) 1225static bool rcu_is_callbacks_kthread(void)
1295{ 1226{
1296 return false; 1227 return false;
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index f65a73a97323..065183391f75 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -630,7 +630,9 @@ static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
630 time_before(j, rcu_state.gp_req_activity + gpssdelay) || 630 time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
631 time_before(j, rcu_state.gp_activity + gpssdelay) || 631 time_before(j, rcu_state.gp_activity + gpssdelay) ||
632 atomic_xchg(&warned, 1)) { 632 atomic_xchg(&warned, 1)) {
633 raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */ 633 if (rnp_root != rnp)
634 /* irqs remain disabled. */
635 raw_spin_unlock_rcu_node(rnp_root);
634 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 636 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
635 return; 637 return;
636 } 638 }
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c3bf44ba42e5..61df2bf08563 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -423,6 +423,19 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
423 do { } while (0) 423 do { } while (0)
424#endif 424#endif
425 425
426#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
427/* Get rcutorture access to sched_setaffinity(). */
428long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
429{
430 int ret;
431
432 ret = sched_setaffinity(pid, in_mask);
433 WARN_ONCE(ret, "%s: sched_setaffinity() returned %d\n", __func__, ret);
434 return ret;
435}
436EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
437#endif
438
426#ifdef CONFIG_RCU_STALL_COMMON 439#ifdef CONFIG_RCU_STALL_COMMON
427int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ 440int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
428EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress); 441EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b798fe7ff7cd..036be95a87e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5922,6 +5922,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
5922 u64 time, cost; 5922 u64 time, cost;
5923 s64 delta; 5923 s64 delta;
5924 int cpu, nr = INT_MAX; 5924 int cpu, nr = INT_MAX;
5925 int this = smp_processor_id();
5925 5926
5926 this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); 5927 this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
5927 if (!this_sd) 5928 if (!this_sd)
@@ -5945,7 +5946,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
5945 nr = 4; 5946 nr = 4;
5946 } 5947 }
5947 5948
5948 time = local_clock(); 5949 time = cpu_clock(this);
5949 5950
5950 for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { 5951 for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
5951 if (!--nr) 5952 if (!--nr)
@@ -5956,7 +5957,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
5956 break; 5957 break;
5957 } 5958 }
5958 5959
5959 time = local_clock() - time; 5960 time = cpu_clock(this) - time;
5960 cost = this_sd->avg_scan_cost; 5961 cost = this_sd->avg_scan_cost;
5961 delta = (s64)(time - cost) / 8; 5962 delta = (s64)(time - cost) / 8;
5962 this_sd->avg_scan_cost += delta; 5963 this_sd->avg_scan_cost += delta;
diff --git a/kernel/signal.c b/kernel/signal.c
index d622eac9d169..edf8915ddd54 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2912,7 +2912,8 @@ EXPORT_SYMBOL(set_compat_user_sigmask);
2912 * This is useful for syscalls such as ppoll, pselect, io_pgetevents and 2912 * This is useful for syscalls such as ppoll, pselect, io_pgetevents and
2913 * epoll_pwait where a new sigmask is passed in from userland for the syscalls. 2913 * epoll_pwait where a new sigmask is passed in from userland for the syscalls.
2914 */ 2914 */
2915void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) 2915void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved,
2916 bool interrupted)
2916{ 2917{
2917 2918
2918 if (!usigmask) 2919 if (!usigmask)
@@ -2922,7 +2923,7 @@ void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
2922 * Restoring sigmask here can lead to delivering signals that the above 2923 * Restoring sigmask here can lead to delivering signals that the above
2923 * syscalls are intended to block because of the sigmask passed in. 2924 * syscalls are intended to block because of the sigmask passed in.
2924 */ 2925 */
2925 if (signal_pending(current)) { 2926 if (interrupted) {
2926 current->saved_sigmask = *sigsaved; 2927 current->saved_sigmask = *sigsaved;
2927 set_restore_sigmask(); 2928 set_restore_sigmask();
2928 return; 2929 return;
diff --git a/kernel/smp.c b/kernel/smp.c
index d155374632eb..616d4d114847 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -34,7 +34,7 @@ struct call_function_data {
34 cpumask_var_t cpumask_ipi; 34 cpumask_var_t cpumask_ipi;
35}; 35};
36 36
37static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); 37static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
38 38
39static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); 39static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
40 40
@@ -487,13 +487,11 @@ EXPORT_SYMBOL(smp_call_function_many);
487 * You must not call this function with disabled interrupts or from a 487 * You must not call this function with disabled interrupts or from a
488 * hardware interrupt handler or from a bottom half handler. 488 * hardware interrupt handler or from a bottom half handler.
489 */ 489 */
490int smp_call_function(smp_call_func_t func, void *info, int wait) 490void smp_call_function(smp_call_func_t func, void *info, int wait)
491{ 491{
492 preempt_disable(); 492 preempt_disable();
493 smp_call_function_many(cpu_online_mask, func, info, wait); 493 smp_call_function_many(cpu_online_mask, func, info, wait);
494 preempt_enable(); 494 preempt_enable();
495
496 return 0;
497} 495}
498EXPORT_SYMBOL(smp_call_function); 496EXPORT_SYMBOL(smp_call_function);
499 497
@@ -594,18 +592,16 @@ void __init smp_init(void)
594 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead 592 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
595 * of local_irq_disable/enable(). 593 * of local_irq_disable/enable().
596 */ 594 */
597int on_each_cpu(void (*func) (void *info), void *info, int wait) 595void on_each_cpu(void (*func) (void *info), void *info, int wait)
598{ 596{
599 unsigned long flags; 597 unsigned long flags;
600 int ret = 0;
601 598
602 preempt_disable(); 599 preempt_disable();
603 ret = smp_call_function(func, info, wait); 600 smp_call_function(func, info, wait);
604 local_irq_save(flags); 601 local_irq_save(flags);
605 func(info); 602 func(info);
606 local_irq_restore(flags); 603 local_irq_restore(flags);
607 preempt_enable(); 604 preempt_enable();
608 return ret;
609} 605}
610EXPORT_SYMBOL(on_each_cpu); 606EXPORT_SYMBOL(on_each_cpu);
611 607
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a6b81c6b6bff..0427a86743a4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -649,7 +649,7 @@ static int takeover_tasklets(unsigned int cpu)
649 /* Find end, append list for that CPU. */ 649 /* Find end, append list for that CPU. */
650 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { 650 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
651 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head; 651 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
652 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail); 652 __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
653 per_cpu(tasklet_vec, cpu).head = NULL; 653 per_cpu(tasklet_vec, cpu).head = NULL;
654 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; 654 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
655 } 655 }
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2b5a6754646f..b4f83f7bdf86 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -177,12 +177,18 @@ static void ack_state(struct multi_stop_data *msdata)
177 set_state(msdata, msdata->state + 1); 177 set_state(msdata, msdata->state + 1);
178} 178}
179 179
180void __weak stop_machine_yield(const struct cpumask *cpumask)
181{
182 cpu_relax();
183}
184
180/* This is the cpu_stop function which stops the CPU. */ 185/* This is the cpu_stop function which stops the CPU. */
181static int multi_cpu_stop(void *data) 186static int multi_cpu_stop(void *data)
182{ 187{
183 struct multi_stop_data *msdata = data; 188 struct multi_stop_data *msdata = data;
184 enum multi_stop_state curstate = MULTI_STOP_NONE; 189 enum multi_stop_state curstate = MULTI_STOP_NONE;
185 int cpu = smp_processor_id(), err = 0; 190 int cpu = smp_processor_id(), err = 0;
191 const struct cpumask *cpumask;
186 unsigned long flags; 192 unsigned long flags;
187 bool is_active; 193 bool is_active;
188 194
@@ -192,15 +198,18 @@ static int multi_cpu_stop(void *data)
192 */ 198 */
193 local_save_flags(flags); 199 local_save_flags(flags);
194 200
195 if (!msdata->active_cpus) 201 if (!msdata->active_cpus) {
196 is_active = cpu == cpumask_first(cpu_online_mask); 202 cpumask = cpu_online_mask;
197 else 203 is_active = cpu == cpumask_first(cpumask);
198 is_active = cpumask_test_cpu(cpu, msdata->active_cpus); 204 } else {
205 cpumask = msdata->active_cpus;
206 is_active = cpumask_test_cpu(cpu, cpumask);
207 }
199 208
200 /* Simple state machine */ 209 /* Simple state machine */
201 do { 210 do {
202 /* Chill out and ensure we re-read multi_stop_state. */ 211 /* Chill out and ensure we re-read multi_stop_state. */
203 cpu_relax_yield(); 212 stop_machine_yield(cpumask);
204 if (msdata->state != curstate) { 213 if (msdata->state != curstate) {
205 curstate = msdata->state; 214 curstate = msdata->state;
206 switch (curstate) { 215 switch (curstate) {
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f1e46f338a9c..1867044800bb 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -16,5 +16,6 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
16endif 16endif
17obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o 17obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
18obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o 18obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
19obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
19obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o 20obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
20obj-$(CONFIG_TEST_UDELAY) += test_udelay.o 21obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 0519a8805aab..57518efc3810 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -233,7 +233,6 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
233/** 233/**
234 * alarmtimer_suspend - Suspend time callback 234 * alarmtimer_suspend - Suspend time callback
235 * @dev: unused 235 * @dev: unused
236 * @state: unused
237 * 236 *
238 * When we are going into suspend, we look through the bases 237 * When we are going into suspend, we look through the bases
239 * to see which is the soonest timer to expire. We then 238 * to see which is the soonest timer to expire. We then
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3bcc19ceb073..fff5f64981c6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -105,12 +105,12 @@ static DEFINE_SPINLOCK(watchdog_lock);
105static int watchdog_running; 105static int watchdog_running;
106static atomic_t watchdog_reset_pending; 106static atomic_t watchdog_reset_pending;
107 107
108static void inline clocksource_watchdog_lock(unsigned long *flags) 108static inline void clocksource_watchdog_lock(unsigned long *flags)
109{ 109{
110 spin_lock_irqsave(&watchdog_lock, *flags); 110 spin_lock_irqsave(&watchdog_lock, *flags);
111} 111}
112 112
113static void inline clocksource_watchdog_unlock(unsigned long *flags) 113static inline void clocksource_watchdog_unlock(unsigned long *flags)
114{ 114{
115 spin_unlock_irqrestore(&watchdog_lock, *flags); 115 spin_unlock_irqrestore(&watchdog_lock, *flags);
116} 116}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 41dfff23c1f9..5ee77f1a8a92 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -30,7 +30,6 @@
30#include <linux/syscalls.h> 30#include <linux/syscalls.h>
31#include <linux/interrupt.h> 31#include <linux/interrupt.h>
32#include <linux/tick.h> 32#include <linux/tick.h>
33#include <linux/seq_file.h>
34#include <linux/err.h> 33#include <linux/err.h>
35#include <linux/debugobjects.h> 34#include <linux/debugobjects.h>
36#include <linux/sched/signal.h> 35#include <linux/sched/signal.h>
@@ -1115,9 +1114,10 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1115 * @timer: hrtimer to stop 1114 * @timer: hrtimer to stop
1116 * 1115 *
1117 * Returns: 1116 * Returns:
1118 * 0 when the timer was not active 1117 *
1119 * 1 when the timer was active 1118 * * 0 when the timer was not active
1120 * -1 when the timer is currently executing the callback function and 1119 * * 1 when the timer was active
1120 * * -1 when the timer is currently executing the callback function and
1121 * cannot be stopped 1121 * cannot be stopped
1122 */ 1122 */
1123int hrtimer_try_to_cancel(struct hrtimer *timer) 1123int hrtimer_try_to_cancel(struct hrtimer *timer)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8de4f789dc1b..65eb796610dc 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -43,6 +43,7 @@ static u64 tick_length_base;
43#define MAX_TICKADJ 500LL /* usecs */ 43#define MAX_TICKADJ 500LL /* usecs */
44#define MAX_TICKADJ_SCALED \ 44#define MAX_TICKADJ_SCALED \
45 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) 45 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
46#define MAX_TAI_OFFSET 100000
46 47
47/* 48/*
48 * phase-lock loop variables 49 * phase-lock loop variables
@@ -691,7 +692,8 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
691 time_constant = max(time_constant, 0l); 692 time_constant = max(time_constant, 0l);
692 } 693 }
693 694
694 if (txc->modes & ADJ_TAI && txc->constant >= 0) 695 if (txc->modes & ADJ_TAI &&
696 txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET)
695 *time_tai = txc->constant; 697 *time_tai = txc->constant;
696 698
697 if (txc->modes & ADJ_OFFSET) 699 if (txc->modes & ADJ_OFFSET)
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 29176635991f..d7f2d91acdac 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -980,23 +980,16 @@ retry_delete:
980 */ 980 */
981static void itimer_delete(struct k_itimer *timer) 981static void itimer_delete(struct k_itimer *timer)
982{ 982{
983 unsigned long flags;
984
985retry_delete: 983retry_delete:
986 spin_lock_irqsave(&timer->it_lock, flags); 984 spin_lock_irq(&timer->it_lock);
987 985
988 if (timer_delete_hook(timer) == TIMER_RETRY) { 986 if (timer_delete_hook(timer) == TIMER_RETRY) {
989 unlock_timer(timer, flags); 987 spin_unlock_irq(&timer->it_lock);
990 goto retry_delete; 988 goto retry_delete;
991 } 989 }
992 list_del(&timer->list); 990 list_del(&timer->list);
993 /*
994 * This keeps any tasks waiting on the spin lock from thinking
995 * they got something (see the lock code above).
996 */
997 timer->it_signal = NULL;
998 991
999 unlock_timer(timer, flags); 992 spin_unlock_irq(&timer->it_lock);
1000 release_posix_timer(timer, IT_ID_SET); 993 release_posix_timer(timer, IT_ID_SET);
1001} 994}
1002 995
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 7f7d6914ddd5..5c54ca632d08 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -251,6 +251,10 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
251 if (tv) { 251 if (tv) {
252 if (compat_get_timeval(&user_tv, tv)) 252 if (compat_get_timeval(&user_tv, tv))
253 return -EFAULT; 253 return -EFAULT;
254
255 if (!timeval_valid(&user_tv))
256 return -EINVAL;
257
254 new_ts.tv_sec = user_tv.tv_sec; 258 new_ts.tv_sec = user_tv.tv_sec;
255 new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC; 259 new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
256 } 260 }
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 44b726bab4bd..d911c8470149 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -819,7 +819,7 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
819 819
820 } while (read_seqcount_retry(&tk_core.seq, seq)); 820 } while (read_seqcount_retry(&tk_core.seq, seq));
821 821
822 return base + nsecs; 822 return ktime_add_ns(base, nsecs);
823} 823}
824EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset); 824EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
825 825
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 98ba50dcb1b2..acb326f5f50a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -282,23 +282,6 @@ static inline void timer_list_header(struct seq_file *m, u64 now)
282 SEQ_printf(m, "\n"); 282 SEQ_printf(m, "\n");
283} 283}
284 284
285static int timer_list_show(struct seq_file *m, void *v)
286{
287 struct timer_list_iter *iter = v;
288
289 if (iter->cpu == -1 && !iter->second_pass)
290 timer_list_header(m, iter->now);
291 else if (!iter->second_pass)
292 print_cpu(m, iter->cpu, iter->now);
293#ifdef CONFIG_GENERIC_CLOCKEVENTS
294 else if (iter->cpu == -1 && iter->second_pass)
295 timer_list_show_tickdevices_header(m);
296 else
297 print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
298#endif
299 return 0;
300}
301
302void sysrq_timer_list_show(void) 285void sysrq_timer_list_show(void)
303{ 286{
304 u64 now = ktime_to_ns(ktime_get()); 287 u64 now = ktime_to_ns(ktime_get());
@@ -317,6 +300,24 @@ void sysrq_timer_list_show(void)
317 return; 300 return;
318} 301}
319 302
303#ifdef CONFIG_PROC_FS
304static int timer_list_show(struct seq_file *m, void *v)
305{
306 struct timer_list_iter *iter = v;
307
308 if (iter->cpu == -1 && !iter->second_pass)
309 timer_list_header(m, iter->now);
310 else if (!iter->second_pass)
311 print_cpu(m, iter->cpu, iter->now);
312#ifdef CONFIG_GENERIC_CLOCKEVENTS
313 else if (iter->cpu == -1 && iter->second_pass)
314 timer_list_show_tickdevices_header(m);
315 else
316 print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
317#endif
318 return 0;
319}
320
320static void *move_iter(struct timer_list_iter *iter, loff_t offset) 321static void *move_iter(struct timer_list_iter *iter, loff_t offset)
321{ 322{
322 for (; offset; offset--) { 323 for (; offset; offset--) {
@@ -376,3 +377,4 @@ static int __init init_timer_list_procfs(void)
376 return 0; 377 return 0;
377} 378}
378__initcall(init_timer_list_procfs); 379__initcall(init_timer_list_procfs);
380#endif
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
new file mode 100644
index 000000000000..a80893180826
--- /dev/null
+++ b/kernel/time/vsyscall.c
@@ -0,0 +1,133 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2019 ARM Ltd.
4 *
5 * Generic implementation of update_vsyscall and update_vsyscall_tz.
6 *
7 * Based on the x86 specific implementation.
8 */
9
10#include <linux/hrtimer.h>
11#include <linux/timekeeper_internal.h>
12#include <vdso/datapage.h>
13#include <vdso/helpers.h>
14#include <vdso/vsyscall.h>
15
16static inline void update_vdso_data(struct vdso_data *vdata,
17 struct timekeeper *tk)
18{
19 struct vdso_timestamp *vdso_ts;
20 u64 nsec;
21
22 vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
23 vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
24 vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult;
25 vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift;
26 vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last;
27 vdata[CS_RAW].mask = tk->tkr_raw.mask;
28 vdata[CS_RAW].mult = tk->tkr_raw.mult;
29 vdata[CS_RAW].shift = tk->tkr_raw.shift;
30
31 /* CLOCK_REALTIME */
32 vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
33 vdso_ts->sec = tk->xtime_sec;
34 vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
35
36 /* CLOCK_MONOTONIC */
37 vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
38 vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
39
40 nsec = tk->tkr_mono.xtime_nsec;
41 nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
42 while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
43 nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
44 vdso_ts->sec++;
45 }
46 vdso_ts->nsec = nsec;
47
48 /* CLOCK_MONOTONIC_RAW */
49 vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
50 vdso_ts->sec = tk->raw_sec;
51 vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
52
53 /* CLOCK_BOOTTIME */
54 vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
55 vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
56 nsec = tk->tkr_mono.xtime_nsec;
57 nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
58 ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
59 while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
60 nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
61 vdso_ts->sec++;
62 }
63 vdso_ts->nsec = nsec;
64
65 /* CLOCK_TAI */
66 vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
67 vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
68 vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
69
70 /*
71 * Read without the seqlock held by clock_getres().
72 * Note: No need to have a second copy.
73 */
74 WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution);
75}
76
77void update_vsyscall(struct timekeeper *tk)
78{
79 struct vdso_data *vdata = __arch_get_k_vdso_data();
80 struct vdso_timestamp *vdso_ts;
81 u64 nsec;
82
83 if (__arch_update_vdso_data()) {
84 /*
85 * Some architectures might want to skip the update of the
86 * data page.
87 */
88 return;
89 }
90
91 /* copy vsyscall data */
92 vdso_write_begin(vdata);
93
94 vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk);
95 vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk);
96
97 /* CLOCK_REALTIME_COARSE */
98 vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
99 vdso_ts->sec = tk->xtime_sec;
100 vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
101
102 /* CLOCK_MONOTONIC_COARSE */
103 vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE];
104 vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
105 nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
106 nsec = nsec + tk->wall_to_monotonic.tv_nsec;
107 while (nsec >= NSEC_PER_SEC) {
108 nsec = nsec - NSEC_PER_SEC;
109 vdso_ts->sec++;
110 }
111 vdso_ts->nsec = nsec;
112
113 if (__arch_use_vsyscall(vdata))
114 update_vdso_data(vdata, tk);
115
116 __arch_update_vsyscall(vdata, tk);
117
118 vdso_write_end(vdata);
119
120 __arch_sync_vdso_data(vdata);
121}
122
123void update_vsyscall_tz(void)
124{
125 struct vdso_data *vdata = __arch_get_k_vdso_data();
126
127 if (__arch_use_vsyscall(vdata)) {
128 vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
129 vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
130 }
131
132 __arch_sync_vdso_data(vdata);
133}
diff --git a/kernel/torture.c b/kernel/torture.c
index 17b2be9bde12..a8d9bdfba7c3 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -570,6 +570,7 @@ static void torture_shutdown_cleanup(void)
570static struct task_struct *stutter_task; 570static struct task_struct *stutter_task;
571static int stutter_pause_test; 571static int stutter_pause_test;
572static int stutter; 572static int stutter;
573static int stutter_gap;
573 574
574/* 575/*
575 * Block until the stutter interval ends. This must be called periodically 576 * Block until the stutter interval ends. This must be called periodically
@@ -578,10 +579,12 @@ static int stutter;
578bool stutter_wait(const char *title) 579bool stutter_wait(const char *title)
579{ 580{
580 int spt; 581 int spt;
582 bool ret = false;
581 583
582 cond_resched_tasks_rcu_qs(); 584 cond_resched_tasks_rcu_qs();
583 spt = READ_ONCE(stutter_pause_test); 585 spt = READ_ONCE(stutter_pause_test);
584 for (; spt; spt = READ_ONCE(stutter_pause_test)) { 586 for (; spt; spt = READ_ONCE(stutter_pause_test)) {
587 ret = true;
585 if (spt == 1) { 588 if (spt == 1) {
586 schedule_timeout_interruptible(1); 589 schedule_timeout_interruptible(1);
587 } else if (spt == 2) { 590 } else if (spt == 2) {
@@ -592,7 +595,7 @@ bool stutter_wait(const char *title)
592 } 595 }
593 torture_shutdown_absorb(title); 596 torture_shutdown_absorb(title);
594 } 597 }
595 return !!spt; 598 return ret;
596} 599}
597EXPORT_SYMBOL_GPL(stutter_wait); 600EXPORT_SYMBOL_GPL(stutter_wait);
598 601
@@ -602,17 +605,24 @@ EXPORT_SYMBOL_GPL(stutter_wait);
602 */ 605 */
603static int torture_stutter(void *arg) 606static int torture_stutter(void *arg)
604{ 607{
608 int wtime;
609
605 VERBOSE_TOROUT_STRING("torture_stutter task started"); 610 VERBOSE_TOROUT_STRING("torture_stutter task started");
606 do { 611 do {
607 if (!torture_must_stop() && stutter > 1) { 612 if (!torture_must_stop() && stutter > 1) {
608 WRITE_ONCE(stutter_pause_test, 1); 613 wtime = stutter;
609 schedule_timeout_interruptible(stutter - 1); 614 if (stutter > HZ + 1) {
615 WRITE_ONCE(stutter_pause_test, 1);
616 wtime = stutter - HZ - 1;
617 schedule_timeout_interruptible(wtime);
618 wtime = HZ + 1;
619 }
610 WRITE_ONCE(stutter_pause_test, 2); 620 WRITE_ONCE(stutter_pause_test, 2);
611 schedule_timeout_interruptible(1); 621 schedule_timeout_interruptible(wtime);
612 } 622 }
613 WRITE_ONCE(stutter_pause_test, 0); 623 WRITE_ONCE(stutter_pause_test, 0);
614 if (!torture_must_stop()) 624 if (!torture_must_stop())
615 schedule_timeout_interruptible(stutter); 625 schedule_timeout_interruptible(stutter_gap);
616 torture_shutdown_absorb("torture_stutter"); 626 torture_shutdown_absorb("torture_stutter");
617 } while (!torture_must_stop()); 627 } while (!torture_must_stop());
618 torture_kthread_stopping("torture_stutter"); 628 torture_kthread_stopping("torture_stutter");
@@ -622,9 +632,10 @@ static int torture_stutter(void *arg)
622/* 632/*
623 * Initialize and kick off the torture_stutter kthread. 633 * Initialize and kick off the torture_stutter kthread.
624 */ 634 */
625int torture_stutter_init(const int s) 635int torture_stutter_init(const int s, const int sgap)
626{ 636{
627 stutter = s; 637 stutter = s;
638 stutter_gap = sgap;
628 return torture_create_kthread(torture_stutter, NULL, stutter_task); 639 return torture_create_kthread(torture_stutter, NULL, stutter_task);
629} 640}
630EXPORT_SYMBOL_GPL(torture_stutter_init); 641EXPORT_SYMBOL_GPL(torture_stutter_init);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 38277af44f5c..576c41644e77 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -34,7 +34,6 @@
34#include <linux/hash.h> 34#include <linux/hash.h>
35#include <linux/rcupdate.h> 35#include <linux/rcupdate.h>
36#include <linux/kprobes.h> 36#include <linux/kprobes.h>
37#include <linux/memory.h>
38 37
39#include <trace/events/sched.h> 38#include <trace/events/sched.h>
40 39
@@ -2611,12 +2610,10 @@ static void ftrace_run_update_code(int command)
2611{ 2610{
2612 int ret; 2611 int ret;
2613 2612
2614 mutex_lock(&text_mutex);
2615
2616 ret = ftrace_arch_code_modify_prepare(); 2613 ret = ftrace_arch_code_modify_prepare();
2617 FTRACE_WARN_ON(ret); 2614 FTRACE_WARN_ON(ret);
2618 if (ret) 2615 if (ret)
2619 goto out_unlock; 2616 return;
2620 2617
2621 /* 2618 /*
2622 * By default we use stop_machine() to modify the code. 2619 * By default we use stop_machine() to modify the code.
@@ -2628,9 +2625,6 @@ static void ftrace_run_update_code(int command)
2628 2625
2629 ret = ftrace_arch_code_modify_post_process(); 2626 ret = ftrace_arch_code_modify_post_process();
2630 FTRACE_WARN_ON(ret); 2627 FTRACE_WARN_ON(ret);
2631
2632out_unlock:
2633 mutex_unlock(&text_mutex);
2634} 2628}
2635 2629
2636static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, 2630static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
@@ -5784,7 +5778,6 @@ void ftrace_module_enable(struct module *mod)
5784 struct ftrace_page *pg; 5778 struct ftrace_page *pg;
5785 5779
5786 mutex_lock(&ftrace_lock); 5780 mutex_lock(&ftrace_lock);
5787 mutex_lock(&text_mutex);
5788 5781
5789 if (ftrace_disabled) 5782 if (ftrace_disabled)
5790 goto out_unlock; 5783 goto out_unlock;
@@ -5846,7 +5839,6 @@ void ftrace_module_enable(struct module *mod)
5846 ftrace_arch_code_modify_post_process(); 5839 ftrace_arch_code_modify_post_process();
5847 5840
5848 out_unlock: 5841 out_unlock:
5849 mutex_unlock(&text_mutex);
5850 mutex_unlock(&ftrace_lock); 5842 mutex_unlock(&ftrace_lock);
5851 5843
5852 process_cached_mods(mod->name); 5844 process_cached_mods(mod->name);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 83e08b78dbee..c3aabb576fe5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6719,11 +6719,13 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6719 break; 6719 break;
6720 } 6720 }
6721#endif 6721#endif
6722 if (!tr->allocated_snapshot) { 6722 if (tr->allocated_snapshot)
6723 ret = resize_buffer_duplicate_size(&tr->max_buffer,
6724 &tr->trace_buffer, iter->cpu_file);
6725 else
6723 ret = tracing_alloc_snapshot_instance(tr); 6726 ret = tracing_alloc_snapshot_instance(tr);
6724 if (ret < 0) 6727 if (ret < 0)
6725 break; 6728 break;
6726 }
6727 local_irq_disable(); 6729 local_irq_disable();
6728 /* Now, we're going to swap */ 6730 /* Now, we're going to swap */
6729 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 6731 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
@@ -7126,12 +7128,24 @@ static ssize_t tracing_err_log_write(struct file *file,
7126 return count; 7128 return count;
7127} 7129}
7128 7130
7131static int tracing_err_log_release(struct inode *inode, struct file *file)
7132{
7133 struct trace_array *tr = inode->i_private;
7134
7135 trace_array_put(tr);
7136
7137 if (file->f_mode & FMODE_READ)
7138 seq_release(inode, file);
7139
7140 return 0;
7141}
7142
7129static const struct file_operations tracing_err_log_fops = { 7143static const struct file_operations tracing_err_log_fops = {
7130 .open = tracing_err_log_open, 7144 .open = tracing_err_log_open,
7131 .write = tracing_err_log_write, 7145 .write = tracing_err_log_write,
7132 .read = seq_read, 7146 .read = seq_read,
7133 .llseek = seq_lseek, 7147 .llseek = seq_lseek,
7134 .release = tracing_release_generic_tr, 7148 .release = tracing_err_log_release,
7135}; 7149};
7136 7150
7137static int tracing_buffers_open(struct inode *inode, struct file *filp) 7151static int tracing_buffers_open(struct inode *inode, struct file *filp)
diff --git a/kernel/up.c b/kernel/up.c
index 483c9962c999..862b460ab97a 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -35,14 +35,13 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
35} 35}
36EXPORT_SYMBOL(smp_call_function_single_async); 36EXPORT_SYMBOL(smp_call_function_single_async);
37 37
38int on_each_cpu(smp_call_func_t func, void *info, int wait) 38void on_each_cpu(smp_call_func_t func, void *info, int wait)
39{ 39{
40 unsigned long flags; 40 unsigned long flags;
41 41
42 local_irq_save(flags); 42 local_irq_save(flags);
43 func(info); 43 func(info);
44 local_irq_restore(flags); 44 local_irq_restore(flags);
45 return 0;
46} 45}
47EXPORT_SYMBOL(on_each_cpu); 46EXPORT_SYMBOL(on_each_cpu);
48 47
diff --git a/lib/Kconfig b/lib/Kconfig
index 90623a0e1942..8c8eefc5e54c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -576,6 +576,11 @@ config OID_REGISTRY
576config UCS2_STRING 576config UCS2_STRING
577 tristate 577 tristate
578 578
579#
580# generic vdso
581#
582source "lib/vdso/Kconfig"
583
579source "lib/fonts/Kconfig" 584source "lib/fonts/Kconfig"
580 585
581config SG_SPLIT 586config SG_SPLIT
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cbdfae379896..06d9c9d70385 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1095,7 +1095,7 @@ config PROVE_LOCKING
1095 select DEBUG_SPINLOCK 1095 select DEBUG_SPINLOCK
1096 select DEBUG_MUTEXES 1096 select DEBUG_MUTEXES
1097 select DEBUG_RT_MUTEXES if RT_MUTEXES 1097 select DEBUG_RT_MUTEXES if RT_MUTEXES
1098 select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER 1098 select DEBUG_RWSEMS
1099 select DEBUG_WW_MUTEX_SLOWPATH 1099 select DEBUG_WW_MUTEX_SLOWPATH
1100 select DEBUG_LOCK_ALLOC 1100 select DEBUG_LOCK_ALLOC
1101 select TRACE_IRQFLAGS 1101 select TRACE_IRQFLAGS
@@ -1199,10 +1199,10 @@ config DEBUG_WW_MUTEX_SLOWPATH
1199 1199
1200config DEBUG_RWSEMS 1200config DEBUG_RWSEMS
1201 bool "RW Semaphore debugging: basic checks" 1201 bool "RW Semaphore debugging: basic checks"
1202 depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER 1202 depends on DEBUG_KERNEL
1203 help 1203 help
1204 This debugging feature allows mismatched rw semaphore locks and unlocks 1204 This debugging feature allows mismatched rw semaphore locks
1205 to be detected and reported. 1205 and unlocks to be detected and reported.
1206 1206
1207config DEBUG_LOCK_ALLOC 1207config DEBUG_LOCK_ALLOC
1208 bool "Lock debugging: detect incorrect freeing of live locks" 1208 bool "Lock debugging: detect incorrect freeing of live locks"
@@ -1754,6 +1754,18 @@ config RBTREE_TEST
1754 A benchmark measuring the performance of the rbtree library. 1754 A benchmark measuring the performance of the rbtree library.
1755 Also includes rbtree invariant checks. 1755 Also includes rbtree invariant checks.
1756 1756
1757config REED_SOLOMON_TEST
1758 tristate "Reed-Solomon library test"
1759 depends on DEBUG_KERNEL || m
1760 select REED_SOLOMON
1761 select REED_SOLOMON_ENC16
1762 select REED_SOLOMON_DEC16
1763 help
1764 This option enables the self-test function of rslib at boot,
1765 or at module load time.
1766
1767 If unsure, say N.
1768
1757config INTERVAL_TREE_TEST 1769config INTERVAL_TREE_TEST
1758 tristate "Interval tree test" 1770 tristate "Interval tree test"
1759 depends on DEBUG_KERNEL 1771 depends on DEBUG_KERNEL
@@ -1858,6 +1870,14 @@ config TEST_PARMAN
1858 1870
1859 If unsure, say N. 1871 If unsure, say N.
1860 1872
1873config TEST_IRQ_TIMINGS
1874 bool "IRQ timings selftest"
1875 depends on IRQ_TIMINGS
1876 help
1877 Enable this option to test the irq timings code on boot.
1878
1879 If unsure, say N.
1880
1861config TEST_LKM 1881config TEST_LKM
1862 tristate "Test module loading with 'hello world' module" 1882 tristate "Test module loading with 'hello world' module"
1863 depends on m 1883 depends on m
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 7e6905751522..e98c85a99787 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -42,11 +42,11 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
42 return &atomic64_lock[addr & (NR_LOCKS - 1)].lock; 42 return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
43} 43}
44 44
45long long atomic64_read(const atomic64_t *v) 45s64 atomic64_read(const atomic64_t *v)
46{ 46{
47 unsigned long flags; 47 unsigned long flags;
48 raw_spinlock_t *lock = lock_addr(v); 48 raw_spinlock_t *lock = lock_addr(v);
49 long long val; 49 s64 val;
50 50
51 raw_spin_lock_irqsave(lock, flags); 51 raw_spin_lock_irqsave(lock, flags);
52 val = v->counter; 52 val = v->counter;
@@ -55,7 +55,7 @@ long long atomic64_read(const atomic64_t *v)
55} 55}
56EXPORT_SYMBOL(atomic64_read); 56EXPORT_SYMBOL(atomic64_read);
57 57
58void atomic64_set(atomic64_t *v, long long i) 58void atomic64_set(atomic64_t *v, s64 i)
59{ 59{
60 unsigned long flags; 60 unsigned long flags;
61 raw_spinlock_t *lock = lock_addr(v); 61 raw_spinlock_t *lock = lock_addr(v);
@@ -67,7 +67,7 @@ void atomic64_set(atomic64_t *v, long long i)
67EXPORT_SYMBOL(atomic64_set); 67EXPORT_SYMBOL(atomic64_set);
68 68
69#define ATOMIC64_OP(op, c_op) \ 69#define ATOMIC64_OP(op, c_op) \
70void atomic64_##op(long long a, atomic64_t *v) \ 70void atomic64_##op(s64 a, atomic64_t *v) \
71{ \ 71{ \
72 unsigned long flags; \ 72 unsigned long flags; \
73 raw_spinlock_t *lock = lock_addr(v); \ 73 raw_spinlock_t *lock = lock_addr(v); \
@@ -79,11 +79,11 @@ void atomic64_##op(long long a, atomic64_t *v) \
79EXPORT_SYMBOL(atomic64_##op); 79EXPORT_SYMBOL(atomic64_##op);
80 80
81#define ATOMIC64_OP_RETURN(op, c_op) \ 81#define ATOMIC64_OP_RETURN(op, c_op) \
82long long atomic64_##op##_return(long long a, atomic64_t *v) \ 82s64 atomic64_##op##_return(s64 a, atomic64_t *v) \
83{ \ 83{ \
84 unsigned long flags; \ 84 unsigned long flags; \
85 raw_spinlock_t *lock = lock_addr(v); \ 85 raw_spinlock_t *lock = lock_addr(v); \
86 long long val; \ 86 s64 val; \
87 \ 87 \
88 raw_spin_lock_irqsave(lock, flags); \ 88 raw_spin_lock_irqsave(lock, flags); \
89 val = (v->counter c_op a); \ 89 val = (v->counter c_op a); \
@@ -93,11 +93,11 @@ long long atomic64_##op##_return(long long a, atomic64_t *v) \
93EXPORT_SYMBOL(atomic64_##op##_return); 93EXPORT_SYMBOL(atomic64_##op##_return);
94 94
95#define ATOMIC64_FETCH_OP(op, c_op) \ 95#define ATOMIC64_FETCH_OP(op, c_op) \
96long long atomic64_fetch_##op(long long a, atomic64_t *v) \ 96s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \
97{ \ 97{ \
98 unsigned long flags; \ 98 unsigned long flags; \
99 raw_spinlock_t *lock = lock_addr(v); \ 99 raw_spinlock_t *lock = lock_addr(v); \
100 long long val; \ 100 s64 val; \
101 \ 101 \
102 raw_spin_lock_irqsave(lock, flags); \ 102 raw_spin_lock_irqsave(lock, flags); \
103 val = v->counter; \ 103 val = v->counter; \
@@ -130,11 +130,11 @@ ATOMIC64_OPS(xor, ^=)
130#undef ATOMIC64_OP_RETURN 130#undef ATOMIC64_OP_RETURN
131#undef ATOMIC64_OP 131#undef ATOMIC64_OP
132 132
133long long atomic64_dec_if_positive(atomic64_t *v) 133s64 atomic64_dec_if_positive(atomic64_t *v)
134{ 134{
135 unsigned long flags; 135 unsigned long flags;
136 raw_spinlock_t *lock = lock_addr(v); 136 raw_spinlock_t *lock = lock_addr(v);
137 long long val; 137 s64 val;
138 138
139 raw_spin_lock_irqsave(lock, flags); 139 raw_spin_lock_irqsave(lock, flags);
140 val = v->counter - 1; 140 val = v->counter - 1;
@@ -145,11 +145,11 @@ long long atomic64_dec_if_positive(atomic64_t *v)
145} 145}
146EXPORT_SYMBOL(atomic64_dec_if_positive); 146EXPORT_SYMBOL(atomic64_dec_if_positive);
147 147
148long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) 148s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
149{ 149{
150 unsigned long flags; 150 unsigned long flags;
151 raw_spinlock_t *lock = lock_addr(v); 151 raw_spinlock_t *lock = lock_addr(v);
152 long long val; 152 s64 val;
153 153
154 raw_spin_lock_irqsave(lock, flags); 154 raw_spin_lock_irqsave(lock, flags);
155 val = v->counter; 155 val = v->counter;
@@ -160,11 +160,11 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
160} 160}
161EXPORT_SYMBOL(atomic64_cmpxchg); 161EXPORT_SYMBOL(atomic64_cmpxchg);
162 162
163long long atomic64_xchg(atomic64_t *v, long long new) 163s64 atomic64_xchg(atomic64_t *v, s64 new)
164{ 164{
165 unsigned long flags; 165 unsigned long flags;
166 raw_spinlock_t *lock = lock_addr(v); 166 raw_spinlock_t *lock = lock_addr(v);
167 long long val; 167 s64 val;
168 168
169 raw_spin_lock_irqsave(lock, flags); 169 raw_spin_lock_irqsave(lock, flags);
170 val = v->counter; 170 val = v->counter;
@@ -174,11 +174,11 @@ long long atomic64_xchg(atomic64_t *v, long long new)
174} 174}
175EXPORT_SYMBOL(atomic64_xchg); 175EXPORT_SYMBOL(atomic64_xchg);
176 176
177long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u) 177s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
178{ 178{
179 unsigned long flags; 179 unsigned long flags;
180 raw_spinlock_t *lock = lock_addr(v); 180 raw_spinlock_t *lock = lock_addr(v);
181 long long val; 181 s64 val;
182 182
183 raw_spin_lock_irqsave(lock, flags); 183 raw_spin_lock_irqsave(lock, flags);
184 val = v->counter; 184 val = v->counter;
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 55437fd5128b..61261195f5b6 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -25,16 +25,37 @@
25 25
26#define ODEBUG_POOL_SIZE 1024 26#define ODEBUG_POOL_SIZE 1024
27#define ODEBUG_POOL_MIN_LEVEL 256 27#define ODEBUG_POOL_MIN_LEVEL 256
28#define ODEBUG_POOL_PERCPU_SIZE 64
29#define ODEBUG_BATCH_SIZE 16
28 30
29#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT 31#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT
30#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT) 32#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT)
31#define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1)) 33#define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1))
32 34
35/*
36 * We limit the freeing of debug objects via workqueue at a maximum
37 * frequency of 10Hz and about 1024 objects for each freeing operation.
38 * So it is freeing at most 10k debug objects per second.
39 */
40#define ODEBUG_FREE_WORK_MAX 1024
41#define ODEBUG_FREE_WORK_DELAY DIV_ROUND_UP(HZ, 10)
42
33struct debug_bucket { 43struct debug_bucket {
34 struct hlist_head list; 44 struct hlist_head list;
35 raw_spinlock_t lock; 45 raw_spinlock_t lock;
36}; 46};
37 47
48/*
49 * Debug object percpu free list
50 * Access is protected by disabling irq
51 */
52struct debug_percpu_free {
53 struct hlist_head free_objs;
54 int obj_free;
55};
56
57static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool);
58
38static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; 59static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
39 60
40static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; 61static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
@@ -44,13 +65,20 @@ static DEFINE_RAW_SPINLOCK(pool_lock);
44static HLIST_HEAD(obj_pool); 65static HLIST_HEAD(obj_pool);
45static HLIST_HEAD(obj_to_free); 66static HLIST_HEAD(obj_to_free);
46 67
68/*
69 * Because of the presence of percpu free pools, obj_pool_free will
70 * under-count those in the percpu free pools. Similarly, obj_pool_used
71 * will over-count those in the percpu free pools. Adjustments will be
72 * made at debug_stats_show(). Both obj_pool_min_free and obj_pool_max_used
73 * can be off.
74 */
47static int obj_pool_min_free = ODEBUG_POOL_SIZE; 75static int obj_pool_min_free = ODEBUG_POOL_SIZE;
48static int obj_pool_free = ODEBUG_POOL_SIZE; 76static int obj_pool_free = ODEBUG_POOL_SIZE;
49static int obj_pool_used; 77static int obj_pool_used;
50static int obj_pool_max_used; 78static int obj_pool_max_used;
79static bool obj_freeing;
51/* The number of objs on the global free list */ 80/* The number of objs on the global free list */
52static int obj_nr_tofree; 81static int obj_nr_tofree;
53static struct kmem_cache *obj_cache;
54 82
55static int debug_objects_maxchain __read_mostly; 83static int debug_objects_maxchain __read_mostly;
56static int __maybe_unused debug_objects_maxchecked __read_mostly; 84static int __maybe_unused debug_objects_maxchecked __read_mostly;
@@ -63,6 +91,7 @@ static int debug_objects_pool_size __read_mostly
63static int debug_objects_pool_min_level __read_mostly 91static int debug_objects_pool_min_level __read_mostly
64 = ODEBUG_POOL_MIN_LEVEL; 92 = ODEBUG_POOL_MIN_LEVEL;
65static struct debug_obj_descr *descr_test __read_mostly; 93static struct debug_obj_descr *descr_test __read_mostly;
94static struct kmem_cache *obj_cache __read_mostly;
66 95
67/* 96/*
68 * Track numbers of kmem_cache_alloc()/free() calls done. 97 * Track numbers of kmem_cache_alloc()/free() calls done.
@@ -71,7 +100,7 @@ static int debug_objects_allocated;
71static int debug_objects_freed; 100static int debug_objects_freed;
72 101
73static void free_obj_work(struct work_struct *work); 102static void free_obj_work(struct work_struct *work);
74static DECLARE_WORK(debug_obj_work, free_obj_work); 103static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work);
75 104
76static int __init enable_object_debug(char *str) 105static int __init enable_object_debug(char *str)
77{ 106{
@@ -100,7 +129,7 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
100static void fill_pool(void) 129static void fill_pool(void)
101{ 130{
102 gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; 131 gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
103 struct debug_obj *new, *obj; 132 struct debug_obj *obj;
104 unsigned long flags; 133 unsigned long flags;
105 134
106 if (likely(obj_pool_free >= debug_objects_pool_min_level)) 135 if (likely(obj_pool_free >= debug_objects_pool_min_level))
@@ -116,7 +145,7 @@ static void fill_pool(void)
116 * Recheck with the lock held as the worker thread might have 145 * Recheck with the lock held as the worker thread might have
117 * won the race and freed the global free list already. 146 * won the race and freed the global free list already.
118 */ 147 */
119 if (obj_nr_tofree) { 148 while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
120 obj = hlist_entry(obj_to_free.first, typeof(*obj), node); 149 obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
121 hlist_del(&obj->node); 150 hlist_del(&obj->node);
122 obj_nr_tofree--; 151 obj_nr_tofree--;
@@ -130,15 +159,23 @@ static void fill_pool(void)
130 return; 159 return;
131 160
132 while (obj_pool_free < debug_objects_pool_min_level) { 161 while (obj_pool_free < debug_objects_pool_min_level) {
162 struct debug_obj *new[ODEBUG_BATCH_SIZE];
163 int cnt;
133 164
134 new = kmem_cache_zalloc(obj_cache, gfp); 165 for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
135 if (!new) 166 new[cnt] = kmem_cache_zalloc(obj_cache, gfp);
167 if (!new[cnt])
168 break;
169 }
170 if (!cnt)
136 return; 171 return;
137 172
138 raw_spin_lock_irqsave(&pool_lock, flags); 173 raw_spin_lock_irqsave(&pool_lock, flags);
139 hlist_add_head(&new->node, &obj_pool); 174 while (cnt) {
140 debug_objects_allocated++; 175 hlist_add_head(&new[--cnt]->node, &obj_pool);
141 obj_pool_free++; 176 debug_objects_allocated++;
177 obj_pool_free++;
178 }
142 raw_spin_unlock_irqrestore(&pool_lock, flags); 179 raw_spin_unlock_irqrestore(&pool_lock, flags);
143 } 180 }
144} 181}
@@ -163,36 +200,81 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
163} 200}
164 201
165/* 202/*
203 * Allocate a new object from the hlist
204 */
205static struct debug_obj *__alloc_object(struct hlist_head *list)
206{
207 struct debug_obj *obj = NULL;
208
209 if (list->first) {
210 obj = hlist_entry(list->first, typeof(*obj), node);
211 hlist_del(&obj->node);
212 }
213
214 return obj;
215}
216
217/*
166 * Allocate a new object. If the pool is empty, switch off the debugger. 218 * Allocate a new object. If the pool is empty, switch off the debugger.
167 * Must be called with interrupts disabled. 219 * Must be called with interrupts disabled.
168 */ 220 */
169static struct debug_obj * 221static struct debug_obj *
170alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) 222alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
171{ 223{
172 struct debug_obj *obj = NULL; 224 struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
225 struct debug_obj *obj;
173 226
174 raw_spin_lock(&pool_lock); 227 if (likely(obj_cache)) {
175 if (obj_pool.first) { 228 obj = __alloc_object(&percpu_pool->free_objs);
176 obj = hlist_entry(obj_pool.first, typeof(*obj), node); 229 if (obj) {
230 percpu_pool->obj_free--;
231 goto init_obj;
232 }
233 }
177 234
178 obj->object = addr; 235 raw_spin_lock(&pool_lock);
179 obj->descr = descr; 236 obj = __alloc_object(&obj_pool);
180 obj->state = ODEBUG_STATE_NONE; 237 if (obj) {
181 obj->astate = 0; 238 obj_pool_used++;
182 hlist_del(&obj->node); 239 obj_pool_free--;
183 240
184 hlist_add_head(&obj->node, &b->list); 241 /*
242 * Looking ahead, allocate one batch of debug objects and
243 * put them into the percpu free pool.
244 */
245 if (likely(obj_cache)) {
246 int i;
247
248 for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
249 struct debug_obj *obj2;
250
251 obj2 = __alloc_object(&obj_pool);
252 if (!obj2)
253 break;
254 hlist_add_head(&obj2->node,
255 &percpu_pool->free_objs);
256 percpu_pool->obj_free++;
257 obj_pool_used++;
258 obj_pool_free--;
259 }
260 }
185 261
186 obj_pool_used++;
187 if (obj_pool_used > obj_pool_max_used) 262 if (obj_pool_used > obj_pool_max_used)
188 obj_pool_max_used = obj_pool_used; 263 obj_pool_max_used = obj_pool_used;
189 264
190 obj_pool_free--;
191 if (obj_pool_free < obj_pool_min_free) 265 if (obj_pool_free < obj_pool_min_free)
192 obj_pool_min_free = obj_pool_free; 266 obj_pool_min_free = obj_pool_free;
193 } 267 }
194 raw_spin_unlock(&pool_lock); 268 raw_spin_unlock(&pool_lock);
195 269
270init_obj:
271 if (obj) {
272 obj->object = addr;
273 obj->descr = descr;
274 obj->state = ODEBUG_STATE_NONE;
275 obj->astate = 0;
276 hlist_add_head(&obj->node, &b->list);
277 }
196 return obj; 278 return obj;
197} 279}
198 280
@@ -209,13 +291,19 @@ static void free_obj_work(struct work_struct *work)
209 unsigned long flags; 291 unsigned long flags;
210 HLIST_HEAD(tofree); 292 HLIST_HEAD(tofree);
211 293
294 WRITE_ONCE(obj_freeing, false);
212 if (!raw_spin_trylock_irqsave(&pool_lock, flags)) 295 if (!raw_spin_trylock_irqsave(&pool_lock, flags))
213 return; 296 return;
214 297
298 if (obj_pool_free >= debug_objects_pool_size)
299 goto free_objs;
300
215 /* 301 /*
216 * The objs on the pool list might be allocated before the work is 302 * The objs on the pool list might be allocated before the work is
217 * run, so recheck if pool list it full or not, if not fill pool 303 * run, so recheck if pool list it full or not, if not fill pool
218 * list from the global free list 304 * list from the global free list. As it is likely that a workload
305 * may be gearing up to use more and more objects, don't free any
306 * of them until the next round.
219 */ 307 */
220 while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) { 308 while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
221 obj = hlist_entry(obj_to_free.first, typeof(*obj), node); 309 obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
@@ -224,7 +312,10 @@ static void free_obj_work(struct work_struct *work)
224 obj_pool_free++; 312 obj_pool_free++;
225 obj_nr_tofree--; 313 obj_nr_tofree--;
226 } 314 }
315 raw_spin_unlock_irqrestore(&pool_lock, flags);
316 return;
227 317
318free_objs:
228 /* 319 /*
229 * Pool list is already full and there are still objs on the free 320 * Pool list is already full and there are still objs on the free
230 * list. Move remaining free objs to a temporary list to free the 321 * list. Move remaining free objs to a temporary list to free the
@@ -243,24 +334,86 @@ static void free_obj_work(struct work_struct *work)
243 } 334 }
244} 335}
245 336
246static bool __free_object(struct debug_obj *obj) 337static void __free_object(struct debug_obj *obj)
247{ 338{
339 struct debug_obj *objs[ODEBUG_BATCH_SIZE];
340 struct debug_percpu_free *percpu_pool;
341 int lookahead_count = 0;
248 unsigned long flags; 342 unsigned long flags;
249 bool work; 343 bool work;
250 344
251 raw_spin_lock_irqsave(&pool_lock, flags); 345 local_irq_save(flags);
252 work = (obj_pool_free > debug_objects_pool_size) && obj_cache; 346 if (!obj_cache)
347 goto free_to_obj_pool;
348
349 /*
350 * Try to free it into the percpu pool first.
351 */
352 percpu_pool = this_cpu_ptr(&percpu_obj_pool);
353 if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) {
354 hlist_add_head(&obj->node, &percpu_pool->free_objs);
355 percpu_pool->obj_free++;
356 local_irq_restore(flags);
357 return;
358 }
359
360 /*
361 * As the percpu pool is full, look ahead and pull out a batch
362 * of objects from the percpu pool and free them as well.
363 */
364 for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
365 objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
366 if (!objs[lookahead_count])
367 break;
368 percpu_pool->obj_free--;
369 }
370
371free_to_obj_pool:
372 raw_spin_lock(&pool_lock);
373 work = (obj_pool_free > debug_objects_pool_size) && obj_cache &&
374 (obj_nr_tofree < ODEBUG_FREE_WORK_MAX);
253 obj_pool_used--; 375 obj_pool_used--;
254 376
255 if (work) { 377 if (work) {
256 obj_nr_tofree++; 378 obj_nr_tofree++;
257 hlist_add_head(&obj->node, &obj_to_free); 379 hlist_add_head(&obj->node, &obj_to_free);
380 if (lookahead_count) {
381 obj_nr_tofree += lookahead_count;
382 obj_pool_used -= lookahead_count;
383 while (lookahead_count) {
384 hlist_add_head(&objs[--lookahead_count]->node,
385 &obj_to_free);
386 }
387 }
388
389 if ((obj_pool_free > debug_objects_pool_size) &&
390 (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) {
391 int i;
392
393 /*
394 * Free one more batch of objects from obj_pool.
395 */
396 for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
397 obj = __alloc_object(&obj_pool);
398 hlist_add_head(&obj->node, &obj_to_free);
399 obj_pool_free--;
400 obj_nr_tofree++;
401 }
402 }
258 } else { 403 } else {
259 obj_pool_free++; 404 obj_pool_free++;
260 hlist_add_head(&obj->node, &obj_pool); 405 hlist_add_head(&obj->node, &obj_pool);
406 if (lookahead_count) {
407 obj_pool_free += lookahead_count;
408 obj_pool_used -= lookahead_count;
409 while (lookahead_count) {
410 hlist_add_head(&objs[--lookahead_count]->node,
411 &obj_pool);
412 }
413 }
261 } 414 }
262 raw_spin_unlock_irqrestore(&pool_lock, flags); 415 raw_spin_unlock(&pool_lock);
263 return work; 416 local_irq_restore(flags);
264} 417}
265 418
266/* 419/*
@@ -269,8 +422,11 @@ static bool __free_object(struct debug_obj *obj)
269 */ 422 */
270static void free_object(struct debug_obj *obj) 423static void free_object(struct debug_obj *obj)
271{ 424{
272 if (__free_object(obj)) 425 __free_object(obj);
273 schedule_work(&debug_obj_work); 426 if (!obj_freeing && obj_nr_tofree) {
427 WRITE_ONCE(obj_freeing, true);
428 schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
429 }
274} 430}
275 431
276/* 432/*
@@ -372,6 +528,7 @@ static void
372__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) 528__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
373{ 529{
374 enum debug_obj_state state; 530 enum debug_obj_state state;
531 bool check_stack = false;
375 struct debug_bucket *db; 532 struct debug_bucket *db;
376 struct debug_obj *obj; 533 struct debug_obj *obj;
377 unsigned long flags; 534 unsigned long flags;
@@ -391,7 +548,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
391 debug_objects_oom(); 548 debug_objects_oom();
392 return; 549 return;
393 } 550 }
394 debug_object_is_on_stack(addr, onstack); 551 check_stack = true;
395 } 552 }
396 553
397 switch (obj->state) { 554 switch (obj->state) {
@@ -402,20 +559,23 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
402 break; 559 break;
403 560
404 case ODEBUG_STATE_ACTIVE: 561 case ODEBUG_STATE_ACTIVE:
405 debug_print_object(obj, "init");
406 state = obj->state; 562 state = obj->state;
407 raw_spin_unlock_irqrestore(&db->lock, flags); 563 raw_spin_unlock_irqrestore(&db->lock, flags);
564 debug_print_object(obj, "init");
408 debug_object_fixup(descr->fixup_init, addr, state); 565 debug_object_fixup(descr->fixup_init, addr, state);
409 return; 566 return;
410 567
411 case ODEBUG_STATE_DESTROYED: 568 case ODEBUG_STATE_DESTROYED:
569 raw_spin_unlock_irqrestore(&db->lock, flags);
412 debug_print_object(obj, "init"); 570 debug_print_object(obj, "init");
413 break; 571 return;
414 default: 572 default:
415 break; 573 break;
416 } 574 }
417 575
418 raw_spin_unlock_irqrestore(&db->lock, flags); 576 raw_spin_unlock_irqrestore(&db->lock, flags);
577 if (check_stack)
578 debug_object_is_on_stack(addr, onstack);
419} 579}
420 580
421/** 581/**
@@ -473,6 +633,8 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
473 633
474 obj = lookup_object(addr, db); 634 obj = lookup_object(addr, db);
475 if (obj) { 635 if (obj) {
636 bool print_object = false;
637
476 switch (obj->state) { 638 switch (obj->state) {
477 case ODEBUG_STATE_INIT: 639 case ODEBUG_STATE_INIT:
478 case ODEBUG_STATE_INACTIVE: 640 case ODEBUG_STATE_INACTIVE:
@@ -481,14 +643,14 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
481 break; 643 break;
482 644
483 case ODEBUG_STATE_ACTIVE: 645 case ODEBUG_STATE_ACTIVE:
484 debug_print_object(obj, "activate");
485 state = obj->state; 646 state = obj->state;
486 raw_spin_unlock_irqrestore(&db->lock, flags); 647 raw_spin_unlock_irqrestore(&db->lock, flags);
648 debug_print_object(obj, "activate");
487 ret = debug_object_fixup(descr->fixup_activate, addr, state); 649 ret = debug_object_fixup(descr->fixup_activate, addr, state);
488 return ret ? 0 : -EINVAL; 650 return ret ? 0 : -EINVAL;
489 651
490 case ODEBUG_STATE_DESTROYED: 652 case ODEBUG_STATE_DESTROYED:
491 debug_print_object(obj, "activate"); 653 print_object = true;
492 ret = -EINVAL; 654 ret = -EINVAL;
493 break; 655 break;
494 default: 656 default:
@@ -496,10 +658,13 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
496 break; 658 break;
497 } 659 }
498 raw_spin_unlock_irqrestore(&db->lock, flags); 660 raw_spin_unlock_irqrestore(&db->lock, flags);
661 if (print_object)
662 debug_print_object(obj, "activate");
499 return ret; 663 return ret;
500 } 664 }
501 665
502 raw_spin_unlock_irqrestore(&db->lock, flags); 666 raw_spin_unlock_irqrestore(&db->lock, flags);
667
503 /* 668 /*
504 * We are here when a static object is activated. We 669 * We are here when a static object is activated. We
505 * let the type specific code confirm whether this is 670 * let the type specific code confirm whether this is
@@ -531,6 +696,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
531 struct debug_bucket *db; 696 struct debug_bucket *db;
532 struct debug_obj *obj; 697 struct debug_obj *obj;
533 unsigned long flags; 698 unsigned long flags;
699 bool print_object = false;
534 700
535 if (!debug_objects_enabled) 701 if (!debug_objects_enabled)
536 return; 702 return;
@@ -548,24 +714,27 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
548 if (!obj->astate) 714 if (!obj->astate)
549 obj->state = ODEBUG_STATE_INACTIVE; 715 obj->state = ODEBUG_STATE_INACTIVE;
550 else 716 else
551 debug_print_object(obj, "deactivate"); 717 print_object = true;
552 break; 718 break;
553 719
554 case ODEBUG_STATE_DESTROYED: 720 case ODEBUG_STATE_DESTROYED:
555 debug_print_object(obj, "deactivate"); 721 print_object = true;
556 break; 722 break;
557 default: 723 default:
558 break; 724 break;
559 } 725 }
560 } else { 726 }
727
728 raw_spin_unlock_irqrestore(&db->lock, flags);
729 if (!obj) {
561 struct debug_obj o = { .object = addr, 730 struct debug_obj o = { .object = addr,
562 .state = ODEBUG_STATE_NOTAVAILABLE, 731 .state = ODEBUG_STATE_NOTAVAILABLE,
563 .descr = descr }; 732 .descr = descr };
564 733
565 debug_print_object(&o, "deactivate"); 734 debug_print_object(&o, "deactivate");
735 } else if (print_object) {
736 debug_print_object(obj, "deactivate");
566 } 737 }
567
568 raw_spin_unlock_irqrestore(&db->lock, flags);
569} 738}
570EXPORT_SYMBOL_GPL(debug_object_deactivate); 739EXPORT_SYMBOL_GPL(debug_object_deactivate);
571 740
@@ -580,6 +749,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
580 struct debug_bucket *db; 749 struct debug_bucket *db;
581 struct debug_obj *obj; 750 struct debug_obj *obj;
582 unsigned long flags; 751 unsigned long flags;
752 bool print_object = false;
583 753
584 if (!debug_objects_enabled) 754 if (!debug_objects_enabled)
585 return; 755 return;
@@ -599,20 +769,22 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
599 obj->state = ODEBUG_STATE_DESTROYED; 769 obj->state = ODEBUG_STATE_DESTROYED;
600 break; 770 break;
601 case ODEBUG_STATE_ACTIVE: 771 case ODEBUG_STATE_ACTIVE:
602 debug_print_object(obj, "destroy");
603 state = obj->state; 772 state = obj->state;
604 raw_spin_unlock_irqrestore(&db->lock, flags); 773 raw_spin_unlock_irqrestore(&db->lock, flags);
774 debug_print_object(obj, "destroy");
605 debug_object_fixup(descr->fixup_destroy, addr, state); 775 debug_object_fixup(descr->fixup_destroy, addr, state);
606 return; 776 return;
607 777
608 case ODEBUG_STATE_DESTROYED: 778 case ODEBUG_STATE_DESTROYED:
609 debug_print_object(obj, "destroy"); 779 print_object = true;
610 break; 780 break;
611 default: 781 default:
612 break; 782 break;
613 } 783 }
614out_unlock: 784out_unlock:
615 raw_spin_unlock_irqrestore(&db->lock, flags); 785 raw_spin_unlock_irqrestore(&db->lock, flags);
786 if (print_object)
787 debug_print_object(obj, "destroy");
616} 788}
617EXPORT_SYMBOL_GPL(debug_object_destroy); 789EXPORT_SYMBOL_GPL(debug_object_destroy);
618 790
@@ -641,9 +813,9 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
641 813
642 switch (obj->state) { 814 switch (obj->state) {
643 case ODEBUG_STATE_ACTIVE: 815 case ODEBUG_STATE_ACTIVE:
644 debug_print_object(obj, "free");
645 state = obj->state; 816 state = obj->state;
646 raw_spin_unlock_irqrestore(&db->lock, flags); 817 raw_spin_unlock_irqrestore(&db->lock, flags);
818 debug_print_object(obj, "free");
647 debug_object_fixup(descr->fixup_free, addr, state); 819 debug_object_fixup(descr->fixup_free, addr, state);
648 return; 820 return;
649 default: 821 default:
@@ -716,6 +888,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr,
716 struct debug_bucket *db; 888 struct debug_bucket *db;
717 struct debug_obj *obj; 889 struct debug_obj *obj;
718 unsigned long flags; 890 unsigned long flags;
891 bool print_object = false;
719 892
720 if (!debug_objects_enabled) 893 if (!debug_objects_enabled)
721 return; 894 return;
@@ -731,22 +904,25 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr,
731 if (obj->astate == expect) 904 if (obj->astate == expect)
732 obj->astate = next; 905 obj->astate = next;
733 else 906 else
734 debug_print_object(obj, "active_state"); 907 print_object = true;
735 break; 908 break;
736 909
737 default: 910 default:
738 debug_print_object(obj, "active_state"); 911 print_object = true;
739 break; 912 break;
740 } 913 }
741 } else { 914 }
915
916 raw_spin_unlock_irqrestore(&db->lock, flags);
917 if (!obj) {
742 struct debug_obj o = { .object = addr, 918 struct debug_obj o = { .object = addr,
743 .state = ODEBUG_STATE_NOTAVAILABLE, 919 .state = ODEBUG_STATE_NOTAVAILABLE,
744 .descr = descr }; 920 .descr = descr };
745 921
746 debug_print_object(&o, "active_state"); 922 debug_print_object(&o, "active_state");
923 } else if (print_object) {
924 debug_print_object(obj, "active_state");
747 } 925 }
748
749 raw_spin_unlock_irqrestore(&db->lock, flags);
750} 926}
751EXPORT_SYMBOL_GPL(debug_object_active_state); 927EXPORT_SYMBOL_GPL(debug_object_active_state);
752 928
@@ -760,7 +936,6 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
760 struct hlist_node *tmp; 936 struct hlist_node *tmp;
761 struct debug_obj *obj; 937 struct debug_obj *obj;
762 int cnt, objs_checked = 0; 938 int cnt, objs_checked = 0;
763 bool work = false;
764 939
765 saddr = (unsigned long) address; 940 saddr = (unsigned long) address;
766 eaddr = saddr + size; 941 eaddr = saddr + size;
@@ -782,16 +957,16 @@ repeat:
782 957
783 switch (obj->state) { 958 switch (obj->state) {
784 case ODEBUG_STATE_ACTIVE: 959 case ODEBUG_STATE_ACTIVE:
785 debug_print_object(obj, "free");
786 descr = obj->descr; 960 descr = obj->descr;
787 state = obj->state; 961 state = obj->state;
788 raw_spin_unlock_irqrestore(&db->lock, flags); 962 raw_spin_unlock_irqrestore(&db->lock, flags);
963 debug_print_object(obj, "free");
789 debug_object_fixup(descr->fixup_free, 964 debug_object_fixup(descr->fixup_free,
790 (void *) oaddr, state); 965 (void *) oaddr, state);
791 goto repeat; 966 goto repeat;
792 default: 967 default:
793 hlist_del(&obj->node); 968 hlist_del(&obj->node);
794 work |= __free_object(obj); 969 __free_object(obj);
795 break; 970 break;
796 } 971 }
797 } 972 }
@@ -807,8 +982,10 @@ repeat:
807 debug_objects_maxchecked = objs_checked; 982 debug_objects_maxchecked = objs_checked;
808 983
809 /* Schedule work to actually kmem_cache_free() objects */ 984 /* Schedule work to actually kmem_cache_free() objects */
810 if (work) 985 if (!obj_freeing && obj_nr_tofree) {
811 schedule_work(&debug_obj_work); 986 WRITE_ONCE(obj_freeing, true);
987 schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
988 }
812} 989}
813 990
814void debug_check_no_obj_freed(const void *address, unsigned long size) 991void debug_check_no_obj_freed(const void *address, unsigned long size)
@@ -822,13 +999,19 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
822 999
823static int debug_stats_show(struct seq_file *m, void *v) 1000static int debug_stats_show(struct seq_file *m, void *v)
824{ 1001{
1002 int cpu, obj_percpu_free = 0;
1003
1004 for_each_possible_cpu(cpu)
1005 obj_percpu_free += per_cpu(percpu_obj_pool.obj_free, cpu);
1006
825 seq_printf(m, "max_chain :%d\n", debug_objects_maxchain); 1007 seq_printf(m, "max_chain :%d\n", debug_objects_maxchain);
826 seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked); 1008 seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked);
827 seq_printf(m, "warnings :%d\n", debug_objects_warnings); 1009 seq_printf(m, "warnings :%d\n", debug_objects_warnings);
828 seq_printf(m, "fixups :%d\n", debug_objects_fixups); 1010 seq_printf(m, "fixups :%d\n", debug_objects_fixups);
829 seq_printf(m, "pool_free :%d\n", obj_pool_free); 1011 seq_printf(m, "pool_free :%d\n", obj_pool_free + obj_percpu_free);
1012 seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free);
830 seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); 1013 seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
831 seq_printf(m, "pool_used :%d\n", obj_pool_used); 1014 seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free);
832 seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); 1015 seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
833 seq_printf(m, "on_free_list :%d\n", obj_nr_tofree); 1016 seq_printf(m, "on_free_list :%d\n", obj_nr_tofree);
834 seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated); 1017 seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
@@ -850,26 +1033,16 @@ static const struct file_operations debug_stats_fops = {
850 1033
851static int __init debug_objects_init_debugfs(void) 1034static int __init debug_objects_init_debugfs(void)
852{ 1035{
853 struct dentry *dbgdir, *dbgstats; 1036 struct dentry *dbgdir;
854 1037
855 if (!debug_objects_enabled) 1038 if (!debug_objects_enabled)
856 return 0; 1039 return 0;
857 1040
858 dbgdir = debugfs_create_dir("debug_objects", NULL); 1041 dbgdir = debugfs_create_dir("debug_objects", NULL);
859 if (!dbgdir)
860 return -ENOMEM;
861 1042
862 dbgstats = debugfs_create_file("stats", 0444, dbgdir, NULL, 1043 debugfs_create_file("stats", 0444, dbgdir, NULL, &debug_stats_fops);
863 &debug_stats_fops);
864 if (!dbgstats)
865 goto err;
866 1044
867 return 0; 1045 return 0;
868
869err:
870 debugfs_remove(dbgdir);
871
872 return -ENOMEM;
873} 1046}
874__initcall(debug_objects_init_debugfs); 1047__initcall(debug_objects_init_debugfs);
875 1048
@@ -1175,9 +1348,20 @@ free:
1175 */ 1348 */
1176void __init debug_objects_mem_init(void) 1349void __init debug_objects_mem_init(void)
1177{ 1350{
1351 int cpu, extras;
1352
1178 if (!debug_objects_enabled) 1353 if (!debug_objects_enabled)
1179 return; 1354 return;
1180 1355
1356 /*
1357 * Initialize the percpu object pools
1358 *
1359 * Initialization is not strictly necessary, but was done for
1360 * completeness.
1361 */
1362 for_each_possible_cpu(cpu)
1363 INIT_HLIST_HEAD(&per_cpu(percpu_obj_pool.free_objs, cpu));
1364
1181 obj_cache = kmem_cache_create("debug_objects_cache", 1365 obj_cache = kmem_cache_create("debug_objects_cache",
1182 sizeof (struct debug_obj), 0, 1366 sizeof (struct debug_obj), 0,
1183 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, 1367 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE,
@@ -1194,6 +1378,7 @@ void __init debug_objects_mem_init(void)
1194 * Increase the thresholds for allocating and freeing objects 1378 * Increase the thresholds for allocating and freeing objects
1195 * according to the number of possible CPUs available in the system. 1379 * according to the number of possible CPUs available in the system.
1196 */ 1380 */
1197 debug_objects_pool_size += num_possible_cpus() * 32; 1381 extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
1198 debug_objects_pool_min_level += num_possible_cpus() * 4; 1382 debug_objects_pool_size += extras;
1383 debug_objects_pool_min_level += extras;
1199} 1384}
diff --git a/lib/devres.c b/lib/devres.c
index 69bed2f38306..6a0e9bd6524a 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -131,7 +131,8 @@ EXPORT_SYMBOL(devm_iounmap);
131 * if (IS_ERR(base)) 131 * if (IS_ERR(base))
132 * return PTR_ERR(base); 132 * return PTR_ERR(base);
133 */ 133 */
134void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) 134void __iomem *devm_ioremap_resource(struct device *dev,
135 const struct resource *res)
135{ 136{
136 resource_size_t size; 137 resource_size_t size;
137 void __iomem *dest_ptr; 138 void __iomem *dest_ptr;
diff --git a/lib/idr.c b/lib/idr.c
index c34e256d2f01..66a374892482 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -228,11 +228,21 @@ void *idr_get_next(struct idr *idr, int *nextid)
228{ 228{
229 struct radix_tree_iter iter; 229 struct radix_tree_iter iter;
230 void __rcu **slot; 230 void __rcu **slot;
231 void *entry = NULL;
231 unsigned long base = idr->idr_base; 232 unsigned long base = idr->idr_base;
232 unsigned long id = *nextid; 233 unsigned long id = *nextid;
233 234
234 id = (id < base) ? 0 : id - base; 235 id = (id < base) ? 0 : id - base;
235 slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); 236 radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) {
237 entry = rcu_dereference_raw(*slot);
238 if (!entry)
239 continue;
240 if (!xa_is_internal(entry))
241 break;
242 if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry))
243 break;
244 slot = radix_tree_iter_retry(&iter);
245 }
236 if (!slot) 246 if (!slot)
237 return NULL; 247 return NULL;
238 id = iter.index + base; 248 id = iter.index + base;
@@ -241,7 +251,7 @@ void *idr_get_next(struct idr *idr, int *nextid)
241 return NULL; 251 return NULL;
242 252
243 *nextid = id; 253 *nextid = id;
244 return rcu_dereference_raw(*slot); 254 return entry;
245} 255}
246EXPORT_SYMBOL(idr_get_next); 256EXPORT_SYMBOL(idr_get_next);
247 257
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
index 82b19e4f1189..2fd7a46d55ec 100644
--- a/lib/mpi/mpi-pow.c
+++ b/lib/mpi/mpi-pow.c
@@ -24,6 +24,7 @@
24int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) 24int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
25{ 25{
26 mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL; 26 mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL;
27 struct karatsuba_ctx karactx = {};
27 mpi_ptr_t xp_marker = NULL; 28 mpi_ptr_t xp_marker = NULL;
28 mpi_ptr_t tspace = NULL; 29 mpi_ptr_t tspace = NULL;
29 mpi_ptr_t rp, ep, mp, bp; 30 mpi_ptr_t rp, ep, mp, bp;
@@ -150,13 +151,11 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
150 int c; 151 int c;
151 mpi_limb_t e; 152 mpi_limb_t e;
152 mpi_limb_t carry_limb; 153 mpi_limb_t carry_limb;
153 struct karatsuba_ctx karactx;
154 154
155 xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1)); 155 xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1));
156 if (!xp) 156 if (!xp)
157 goto enomem; 157 goto enomem;
158 158
159 memset(&karactx, 0, sizeof karactx);
160 negative_result = (ep[0] & 1) && base->sign; 159 negative_result = (ep[0] & 1) && base->sign;
161 160
162 i = esize - 1; 161 i = esize - 1;
@@ -281,8 +280,6 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
281 if (mod_shift_cnt) 280 if (mod_shift_cnt)
282 mpihelp_rshift(rp, rp, rsize, mod_shift_cnt); 281 mpihelp_rshift(rp, rp, rsize, mod_shift_cnt);
283 MPN_NORMALIZE(rp, rsize); 282 MPN_NORMALIZE(rp, rsize);
284
285 mpihelp_release_karatsuba_ctx(&karactx);
286 } 283 }
287 284
288 if (negative_result && rsize) { 285 if (negative_result && rsize) {
@@ -299,6 +296,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
299leave: 296leave:
300 rc = 0; 297 rc = 0;
301enomem: 298enomem:
299 mpihelp_release_karatsuba_ctx(&karactx);
302 if (assign_rp) 300 if (assign_rp)
303 mpi_assign_limb_space(res, rp, size); 301 mpi_assign_limb_space(res, rp, size);
304 if (mp_marker) 302 if (mp_marker)
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
index 914ebe98fc21..9e597e1f91a4 100644
--- a/lib/raid6/s390vx.uc
+++ b/lib/raid6/s390vx.uc
@@ -60,7 +60,7 @@ static inline void LOAD_DATA(int x, u8 *ptr)
60 typedef struct { u8 _[16 * $#]; } addrtype; 60 typedef struct { u8 _[16 * $#]; } addrtype;
61 register addrtype *__ptr asm("1") = (addrtype *) ptr; 61 register addrtype *__ptr asm("1") = (addrtype *) ptr;
62 62
63 asm volatile ("VLM %2,%3,0,%r1" 63 asm volatile ("VLM %2,%3,0,%1"
64 : : "m" (*__ptr), "a" (__ptr), "i" (x), 64 : : "m" (*__ptr), "a" (__ptr), "i" (x),
65 "i" (x + $# - 1)); 65 "i" (x + $# - 1));
66} 66}
diff --git a/lib/reed_solomon/Makefile b/lib/reed_solomon/Makefile
index ba9d7a3329eb..5d4fa68f26cb 100644
--- a/lib/reed_solomon/Makefile
+++ b/lib/reed_solomon/Makefile
@@ -4,4 +4,4 @@
4# 4#
5 5
6obj-$(CONFIG_REED_SOLOMON) += reed_solomon.o 6obj-$(CONFIG_REED_SOLOMON) += reed_solomon.o
7 7obj-$(CONFIG_REED_SOLOMON_TEST) += test_rslib.o
diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c
index 1db74eb098d0..805de84ae83d 100644
--- a/lib/reed_solomon/decode_rs.c
+++ b/lib/reed_solomon/decode_rs.c
@@ -22,6 +22,7 @@
22 uint16_t *index_of = rs->index_of; 22 uint16_t *index_of = rs->index_of;
23 uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error; 23 uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error;
24 int count = 0; 24 int count = 0;
25 int num_corrected;
25 uint16_t msk = (uint16_t) rs->nn; 26 uint16_t msk = (uint16_t) rs->nn;
26 27
27 /* 28 /*
@@ -39,11 +40,21 @@
39 40
40 /* Check length parameter for validity */ 41 /* Check length parameter for validity */
41 pad = nn - nroots - len; 42 pad = nn - nroots - len;
42 BUG_ON(pad < 0 || pad >= nn); 43 BUG_ON(pad < 0 || pad >= nn - nroots);
43 44
44 /* Does the caller provide the syndrome ? */ 45 /* Does the caller provide the syndrome ? */
45 if (s != NULL) 46 if (s != NULL) {
46 goto decode; 47 for (i = 0; i < nroots; i++) {
48 /* The syndrome is in index form,
49 * so nn represents zero
50 */
51 if (s[i] != nn)
52 goto decode;
53 }
54
55 /* syndrome is zero, no errors to correct */
56 return 0;
57 }
47 58
48 /* form the syndromes; i.e., evaluate data(x) at roots of 59 /* form the syndromes; i.e., evaluate data(x) at roots of
49 * g(x) */ 60 * g(x) */
@@ -88,8 +99,7 @@
88 /* if syndrome is zero, data[] is a codeword and there are no 99 /* if syndrome is zero, data[] is a codeword and there are no
89 * errors to correct. So return data[] unmodified 100 * errors to correct. So return data[] unmodified
90 */ 101 */
91 count = 0; 102 return 0;
92 goto finish;
93 } 103 }
94 104
95 decode: 105 decode:
@@ -99,9 +109,9 @@
99 if (no_eras > 0) { 109 if (no_eras > 0) {
100 /* Init lambda to be the erasure locator polynomial */ 110 /* Init lambda to be the erasure locator polynomial */
101 lambda[1] = alpha_to[rs_modnn(rs, 111 lambda[1] = alpha_to[rs_modnn(rs,
102 prim * (nn - 1 - eras_pos[0]))]; 112 prim * (nn - 1 - (eras_pos[0] + pad)))];
103 for (i = 1; i < no_eras; i++) { 113 for (i = 1; i < no_eras; i++) {
104 u = rs_modnn(rs, prim * (nn - 1 - eras_pos[i])); 114 u = rs_modnn(rs, prim * (nn - 1 - (eras_pos[i] + pad)));
105 for (j = i + 1; j > 0; j--) { 115 for (j = i + 1; j > 0; j--) {
106 tmp = index_of[lambda[j - 1]]; 116 tmp = index_of[lambda[j - 1]];
107 if (tmp != nn) { 117 if (tmp != nn) {
@@ -175,6 +185,15 @@
175 if (lambda[i] != nn) 185 if (lambda[i] != nn)
176 deg_lambda = i; 186 deg_lambda = i;
177 } 187 }
188
189 if (deg_lambda == 0) {
190 /*
191 * deg(lambda) is zero even though the syndrome is non-zero
192 * => uncorrectable error detected
193 */
194 return -EBADMSG;
195 }
196
178 /* Find roots of error+erasure locator polynomial by Chien search */ 197 /* Find roots of error+erasure locator polynomial by Chien search */
179 memcpy(&reg[1], &lambda[1], nroots * sizeof(reg[0])); 198 memcpy(&reg[1], &lambda[1], nroots * sizeof(reg[0]));
180 count = 0; /* Number of roots of lambda(x) */ 199 count = 0; /* Number of roots of lambda(x) */
@@ -188,6 +207,12 @@
188 } 207 }
189 if (q != 0) 208 if (q != 0)
190 continue; /* Not a root */ 209 continue; /* Not a root */
210
211 if (k < pad) {
212 /* Impossible error location. Uncorrectable error. */
213 return -EBADMSG;
214 }
215
191 /* store root (index-form) and error location number */ 216 /* store root (index-form) and error location number */
192 root[count] = i; 217 root[count] = i;
193 loc[count] = k; 218 loc[count] = k;
@@ -202,8 +227,7 @@
202 * deg(lambda) unequal to number of roots => uncorrectable 227 * deg(lambda) unequal to number of roots => uncorrectable
203 * error detected 228 * error detected
204 */ 229 */
205 count = -EBADMSG; 230 return -EBADMSG;
206 goto finish;
207 } 231 }
208 /* 232 /*
209 * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo 233 * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
@@ -223,7 +247,9 @@
223 /* 247 /*
224 * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 = 248 * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
225 * inv(X(l))**(fcr-1) and den = lambda_pr(inv(X(l))) all in poly-form 249 * inv(X(l))**(fcr-1) and den = lambda_pr(inv(X(l))) all in poly-form
250 * Note: we reuse the buffer for b to store the correction pattern
226 */ 251 */
252 num_corrected = 0;
227 for (j = count - 1; j >= 0; j--) { 253 for (j = count - 1; j >= 0; j--) {
228 num1 = 0; 254 num1 = 0;
229 for (i = deg_omega; i >= 0; i--) { 255 for (i = deg_omega; i >= 0; i--) {
@@ -231,6 +257,13 @@
231 num1 ^= alpha_to[rs_modnn(rs, omega[i] + 257 num1 ^= alpha_to[rs_modnn(rs, omega[i] +
232 i * root[j])]; 258 i * root[j])];
233 } 259 }
260
261 if (num1 == 0) {
262 /* Nothing to correct at this position */
263 b[j] = 0;
264 continue;
265 }
266
234 num2 = alpha_to[rs_modnn(rs, root[j] * (fcr - 1) + nn)]; 267 num2 = alpha_to[rs_modnn(rs, root[j] * (fcr - 1) + nn)];
235 den = 0; 268 den = 0;
236 269
@@ -242,30 +275,52 @@
242 i * root[j])]; 275 i * root[j])];
243 } 276 }
244 } 277 }
245 /* Apply error to data */ 278
246 if (num1 != 0 && loc[j] >= pad) { 279 b[j] = alpha_to[rs_modnn(rs, index_of[num1] +
247 uint16_t cor = alpha_to[rs_modnn(rs,index_of[num1] + 280 index_of[num2] +
248 index_of[num2] + 281 nn - index_of[den])];
249 nn - index_of[den])]; 282 num_corrected++;
250 /* Store the error correction pattern, if a 283 }
251 * correction buffer is available */ 284
252 if (corr) { 285 /*
253 corr[j] = cor; 286 * We compute the syndrome of the 'error' and check that it matches
254 } else { 287 * the syndrome of the received word
255 /* If a data buffer is given and the 288 */
256 * error is inside the message, 289 for (i = 0; i < nroots; i++) {
257 * correct it */ 290 tmp = 0;
258 if (data && (loc[j] < (nn - nroots))) 291 for (j = 0; j < count; j++) {
259 data[loc[j] - pad] ^= cor; 292 if (b[j] == 0)
260 } 293 continue;
294
295 k = (fcr + i) * prim * (nn-loc[j]-1);
296 tmp ^= alpha_to[rs_modnn(rs, index_of[b[j]] + k)];
261 } 297 }
298
299 if (tmp != alpha_to[s[i]])
300 return -EBADMSG;
262 } 301 }
263 302
264finish: 303 /*
265 if (eras_pos != NULL) { 304 * Store the error correction pattern, if a
266 for (i = 0; i < count; i++) 305 * correction buffer is available
267 eras_pos[i] = loc[i] - pad; 306 */
307 if (corr && eras_pos) {
308 j = 0;
309 for (i = 0; i < count; i++) {
310 if (b[i]) {
311 corr[j] = b[i];
312 eras_pos[j++] = loc[i] - pad;
313 }
314 }
315 } else if (data && par) {
316 /* Apply error to data and parity */
317 for (i = 0; i < count; i++) {
318 if (loc[i] < (nn - nroots))
319 data[loc[i] - pad] ^= b[i];
320 else
321 par[loc[i] - pad - len] ^= b[i];
322 }
268 } 323 }
269 return count;
270 324
325 return num_corrected;
271} 326}
diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c
index e5fdc8b9e856..bbc01bad3053 100644
--- a/lib/reed_solomon/reed_solomon.c
+++ b/lib/reed_solomon/reed_solomon.c
@@ -340,7 +340,8 @@ EXPORT_SYMBOL_GPL(encode_rs8);
340 * @data: data field of a given type 340 * @data: data field of a given type
341 * @par: received parity data field 341 * @par: received parity data field
342 * @len: data length 342 * @len: data length
343 * @s: syndrome data field (if NULL, syndrome is calculated) 343 * @s: syndrome data field, must be in index form
344 * (if NULL, syndrome is calculated)
344 * @no_eras: number of erasures 345 * @no_eras: number of erasures
345 * @eras_pos: position of erasures, can be NULL 346 * @eras_pos: position of erasures, can be NULL
346 * @invmsk: invert data mask (will be xored on data, not on parity!) 347 * @invmsk: invert data mask (will be xored on data, not on parity!)
@@ -354,7 +355,8 @@ EXPORT_SYMBOL_GPL(encode_rs8);
354 * decoding, so the caller has to ensure that decoder invocations are 355 * decoding, so the caller has to ensure that decoder invocations are
355 * serialized. 356 * serialized.
356 * 357 *
357 * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. 358 * Returns the number of corrected symbols or -EBADMSG for uncorrectable
359 * errors. The count includes errors in the parity.
358 */ 360 */
359int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len, 361int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len,
360 uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, 362 uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
@@ -391,7 +393,8 @@ EXPORT_SYMBOL_GPL(encode_rs16);
391 * @data: data field of a given type 393 * @data: data field of a given type
392 * @par: received parity data field 394 * @par: received parity data field
393 * @len: data length 395 * @len: data length
394 * @s: syndrome data field (if NULL, syndrome is calculated) 396 * @s: syndrome data field, must be in index form
397 * (if NULL, syndrome is calculated)
395 * @no_eras: number of erasures 398 * @no_eras: number of erasures
396 * @eras_pos: position of erasures, can be NULL 399 * @eras_pos: position of erasures, can be NULL
397 * @invmsk: invert data mask (will be xored on data, not on parity!) 400 * @invmsk: invert data mask (will be xored on data, not on parity!)
@@ -403,7 +406,8 @@ EXPORT_SYMBOL_GPL(encode_rs16);
403 * decoding, so the caller has to ensure that decoder invocations are 406 * decoding, so the caller has to ensure that decoder invocations are
404 * serialized. 407 * serialized.
405 * 408 *
406 * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. 409 * Returns the number of corrected symbols or -EBADMSG for uncorrectable
410 * errors. The count includes errors in the parity.
407 */ 411 */
408int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len, 412int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len,
409 uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, 413 uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
diff --git a/lib/reed_solomon/test_rslib.c b/lib/reed_solomon/test_rslib.c
new file mode 100644
index 000000000000..4eb29f365ece
--- /dev/null
+++ b/lib/reed_solomon/test_rslib.c
@@ -0,0 +1,518 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Tests for Generic Reed Solomon encoder / decoder library
4 *
5 * Written by Ferdinand Blomqvist
6 * Based on previous work by Phil Karn, KA9Q
7 */
8#include <linux/rslib.h>
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/moduleparam.h>
12#include <linux/random.h>
13#include <linux/slab.h>
14
15enum verbosity {
16 V_SILENT,
17 V_PROGRESS,
18 V_CSUMMARY
19};
20
21enum method {
22 CORR_BUFFER,
23 CALLER_SYNDROME,
24 IN_PLACE
25};
26
27#define __param(type, name, init, msg) \
28 static type name = init; \
29 module_param(name, type, 0444); \
30 MODULE_PARM_DESC(name, msg)
31
32__param(int, v, V_PROGRESS, "Verbosity level");
33__param(int, ewsc, 1, "Erasures without symbol corruption");
34__param(int, bc, 1, "Test for correct behaviour beyond error correction capacity");
35
36struct etab {
37 int symsize;
38 int genpoly;
39 int fcs;
40 int prim;
41 int nroots;
42 int ntrials;
43};
44
45/* List of codes to test */
46static struct etab Tab[] = {
47 {2, 0x7, 1, 1, 1, 100000 },
48 {3, 0xb, 1, 1, 2, 100000 },
49 {3, 0xb, 1, 1, 3, 100000 },
50 {3, 0xb, 2, 1, 4, 100000 },
51 {4, 0x13, 1, 1, 4, 10000 },
52 {5, 0x25, 1, 1, 6, 1000 },
53 {6, 0x43, 3, 1, 8, 1000 },
54 {7, 0x89, 1, 1, 14, 500 },
55 {8, 0x11d, 1, 1, 30, 100 },
56 {8, 0x187, 112, 11, 32, 100 },
57 {9, 0x211, 1, 1, 33, 80 },
58 {0, 0, 0, 0, 0, 0},
59};
60
61
62struct estat {
63 int dwrong;
64 int irv;
65 int wepos;
66 int nwords;
67};
68
69struct bcstat {
70 int rfail;
71 int rsuccess;
72 int noncw;
73 int nwords;
74};
75
76struct wspace {
77 uint16_t *c; /* sent codeword */
78 uint16_t *r; /* received word */
79 uint16_t *s; /* syndrome */
80 uint16_t *corr; /* correction buffer */
81 int *errlocs;
82 int *derrlocs;
83};
84
85struct pad {
86 int mult;
87 int shift;
88};
89
90static struct pad pad_coef[] = {
91 { 0, 0 },
92 { 1, 2 },
93 { 1, 1 },
94 { 3, 2 },
95 { 1, 0 },
96};
97
98static void free_ws(struct wspace *ws)
99{
100 if (!ws)
101 return;
102
103 kfree(ws->errlocs);
104 kfree(ws->c);
105 kfree(ws);
106}
107
108static struct wspace *alloc_ws(struct rs_codec *rs)
109{
110 int nroots = rs->nroots;
111 struct wspace *ws;
112 int nn = rs->nn;
113
114 ws = kzalloc(sizeof(*ws), GFP_KERNEL);
115 if (!ws)
116 return NULL;
117
118 ws->c = kmalloc_array(2 * (nn + nroots),
119 sizeof(uint16_t), GFP_KERNEL);
120 if (!ws->c)
121 goto err;
122
123 ws->r = ws->c + nn;
124 ws->s = ws->r + nn;
125 ws->corr = ws->s + nroots;
126
127 ws->errlocs = kmalloc_array(nn + nroots, sizeof(int), GFP_KERNEL);
128 if (!ws->errlocs)
129 goto err;
130
131 ws->derrlocs = ws->errlocs + nn;
132 return ws;
133
134err:
135 free_ws(ws);
136 return NULL;
137}
138
139
140/*
141 * Generates a random codeword and stores it in c. Generates random errors and
142 * erasures, and stores the random word with errors in r. Erasure positions are
143 * stored in derrlocs, while errlocs has one of three values in every position:
144 *
145 * 0 if there is no error in this position;
146 * 1 if there is a symbol error in this position;
147 * 2 if there is an erasure without symbol corruption.
148 *
149 * Returns the number of corrupted symbols.
150 */
151static int get_rcw_we(struct rs_control *rs, struct wspace *ws,
152 int len, int errs, int eras)
153{
154 int nroots = rs->codec->nroots;
155 int *derrlocs = ws->derrlocs;
156 int *errlocs = ws->errlocs;
157 int dlen = len - nroots;
158 int nn = rs->codec->nn;
159 uint16_t *c = ws->c;
160 uint16_t *r = ws->r;
161 int errval;
162 int errloc;
163 int i;
164
165 /* Load c with random data and encode */
166 for (i = 0; i < dlen; i++)
167 c[i] = prandom_u32() & nn;
168
169 memset(c + dlen, 0, nroots * sizeof(*c));
170 encode_rs16(rs, c, dlen, c + dlen, 0);
171
172 /* Make copyand add errors and erasures */
173 memcpy(r, c, len * sizeof(*r));
174 memset(errlocs, 0, len * sizeof(*errlocs));
175 memset(derrlocs, 0, nroots * sizeof(*derrlocs));
176
177 /* Generating random errors */
178 for (i = 0; i < errs; i++) {
179 do {
180 /* Error value must be nonzero */
181 errval = prandom_u32() & nn;
182 } while (errval == 0);
183
184 do {
185 /* Must not choose the same location twice */
186 errloc = prandom_u32() % len;
187 } while (errlocs[errloc] != 0);
188
189 errlocs[errloc] = 1;
190 r[errloc] ^= errval;
191 }
192
193 /* Generating random erasures */
194 for (i = 0; i < eras; i++) {
195 do {
196 /* Must not choose the same location twice */
197 errloc = prandom_u32() % len;
198 } while (errlocs[errloc] != 0);
199
200 derrlocs[i] = errloc;
201
202 if (ewsc && (prandom_u32() & 1)) {
203 /* Erasure with the symbol intact */
204 errlocs[errloc] = 2;
205 } else {
206 /* Erasure with corrupted symbol */
207 do {
208 /* Error value must be nonzero */
209 errval = prandom_u32() & nn;
210 } while (errval == 0);
211
212 errlocs[errloc] = 1;
213 r[errloc] ^= errval;
214 errs++;
215 }
216 }
217
218 return errs;
219}
220
221static void fix_err(uint16_t *data, int nerrs, uint16_t *corr, int *errlocs)
222{
223 int i;
224
225 for (i = 0; i < nerrs; i++)
226 data[errlocs[i]] ^= corr[i];
227}
228
229static void compute_syndrome(struct rs_control *rsc, uint16_t *data,
230 int len, uint16_t *syn)
231{
232 struct rs_codec *rs = rsc->codec;
233 uint16_t *alpha_to = rs->alpha_to;
234 uint16_t *index_of = rs->index_of;
235 int nroots = rs->nroots;
236 int prim = rs->prim;
237 int fcr = rs->fcr;
238 int i, j;
239
240 /* Calculating syndrome */
241 for (i = 0; i < nroots; i++) {
242 syn[i] = data[0];
243 for (j = 1; j < len; j++) {
244 if (syn[i] == 0) {
245 syn[i] = data[j];
246 } else {
247 syn[i] = data[j] ^
248 alpha_to[rs_modnn(rs, index_of[syn[i]]
249 + (fcr + i) * prim)];
250 }
251 }
252 }
253
254 /* Convert to index form */
255 for (i = 0; i < nroots; i++)
256 syn[i] = rs->index_of[syn[i]];
257}
258
259/* Test up to error correction capacity */
260static void test_uc(struct rs_control *rs, int len, int errs,
261 int eras, int trials, struct estat *stat,
262 struct wspace *ws, int method)
263{
264 int dlen = len - rs->codec->nroots;
265 int *derrlocs = ws->derrlocs;
266 int *errlocs = ws->errlocs;
267 uint16_t *corr = ws->corr;
268 uint16_t *c = ws->c;
269 uint16_t *r = ws->r;
270 uint16_t *s = ws->s;
271 int derrs, nerrs;
272 int i, j;
273
274 for (j = 0; j < trials; j++) {
275 nerrs = get_rcw_we(rs, ws, len, errs, eras);
276
277 switch (method) {
278 case CORR_BUFFER:
279 derrs = decode_rs16(rs, r, r + dlen, dlen,
280 NULL, eras, derrlocs, 0, corr);
281 fix_err(r, derrs, corr, derrlocs);
282 break;
283 case CALLER_SYNDROME:
284 compute_syndrome(rs, r, len, s);
285 derrs = decode_rs16(rs, NULL, NULL, dlen,
286 s, eras, derrlocs, 0, corr);
287 fix_err(r, derrs, corr, derrlocs);
288 break;
289 case IN_PLACE:
290 derrs = decode_rs16(rs, r, r + dlen, dlen,
291 NULL, eras, derrlocs, 0, NULL);
292 break;
293 default:
294 continue;
295 }
296
297 if (derrs != nerrs)
298 stat->irv++;
299
300 if (method != IN_PLACE) {
301 for (i = 0; i < derrs; i++) {
302 if (errlocs[derrlocs[i]] != 1)
303 stat->wepos++;
304 }
305 }
306
307 if (memcmp(r, c, len * sizeof(*r)))
308 stat->dwrong++;
309 }
310 stat->nwords += trials;
311}
312
313static int ex_rs_helper(struct rs_control *rs, struct wspace *ws,
314 int len, int trials, int method)
315{
316 static const char * const desc[] = {
317 "Testing correction buffer interface...",
318 "Testing with caller provided syndrome...",
319 "Testing in-place interface..."
320 };
321
322 struct estat stat = {0, 0, 0, 0};
323 int nroots = rs->codec->nroots;
324 int errs, eras, retval;
325
326 if (v >= V_PROGRESS)
327 pr_info(" %s\n", desc[method]);
328
329 for (errs = 0; errs <= nroots / 2; errs++)
330 for (eras = 0; eras <= nroots - 2 * errs; eras++)
331 test_uc(rs, len, errs, eras, trials, &stat, ws, method);
332
333 if (v >= V_CSUMMARY) {
334 pr_info(" Decodes wrong: %d / %d\n",
335 stat.dwrong, stat.nwords);
336 pr_info(" Wrong return value: %d / %d\n",
337 stat.irv, stat.nwords);
338 if (method != IN_PLACE)
339 pr_info(" Wrong error position: %d\n", stat.wepos);
340 }
341
342 retval = stat.dwrong + stat.wepos + stat.irv;
343 if (retval && v >= V_PROGRESS)
344 pr_warn(" FAIL: %d decoding failures!\n", retval);
345
346 return retval;
347}
348
349static int exercise_rs(struct rs_control *rs, struct wspace *ws,
350 int len, int trials)
351{
352
353 int retval = 0;
354 int i;
355
356 if (v >= V_PROGRESS)
357 pr_info("Testing up to error correction capacity...\n");
358
359 for (i = 0; i <= IN_PLACE; i++)
360 retval |= ex_rs_helper(rs, ws, len, trials, i);
361
362 return retval;
363}
364
365/* Tests for correct behaviour beyond error correction capacity */
366static void test_bc(struct rs_control *rs, int len, int errs,
367 int eras, int trials, struct bcstat *stat,
368 struct wspace *ws)
369{
370 int nroots = rs->codec->nroots;
371 int dlen = len - nroots;
372 int *derrlocs = ws->derrlocs;
373 uint16_t *corr = ws->corr;
374 uint16_t *r = ws->r;
375 int derrs, j;
376
377 for (j = 0; j < trials; j++) {
378 get_rcw_we(rs, ws, len, errs, eras);
379 derrs = decode_rs16(rs, r, r + dlen, dlen,
380 NULL, eras, derrlocs, 0, corr);
381 fix_err(r, derrs, corr, derrlocs);
382
383 if (derrs >= 0) {
384 stat->rsuccess++;
385
386 /*
387 * We check that the returned word is actually a
388 * codeword. The obious way to do this would be to
389 * compute the syndrome, but we don't want to replicate
390 * that code here. However, all the codes are in
391 * systematic form, and therefore we can encode the
392 * returned word, and see whether the parity changes or
393 * not.
394 */
395 memset(corr, 0, nroots * sizeof(*corr));
396 encode_rs16(rs, r, dlen, corr, 0);
397
398 if (memcmp(r + dlen, corr, nroots * sizeof(*corr)))
399 stat->noncw++;
400 } else {
401 stat->rfail++;
402 }
403 }
404 stat->nwords += trials;
405}
406
407static int exercise_rs_bc(struct rs_control *rs, struct wspace *ws,
408 int len, int trials)
409{
410 struct bcstat stat = {0, 0, 0, 0};
411 int nroots = rs->codec->nroots;
412 int errs, eras, cutoff;
413
414 if (v >= V_PROGRESS)
415 pr_info("Testing beyond error correction capacity...\n");
416
417 for (errs = 1; errs <= nroots; errs++) {
418 eras = nroots - 2 * errs + 1;
419 if (eras < 0)
420 eras = 0;
421
422 cutoff = nroots <= len - errs ? nroots : len - errs;
423 for (; eras <= cutoff; eras++)
424 test_bc(rs, len, errs, eras, trials, &stat, ws);
425 }
426
427 if (v >= V_CSUMMARY) {
428 pr_info(" decoder gives up: %d / %d\n",
429 stat.rfail, stat.nwords);
430 pr_info(" decoder returns success: %d / %d\n",
431 stat.rsuccess, stat.nwords);
432 pr_info(" not a codeword: %d / %d\n",
433 stat.noncw, stat.rsuccess);
434 }
435
436 if (stat.noncw && v >= V_PROGRESS)
437 pr_warn(" FAIL: %d silent failures!\n", stat.noncw);
438
439 return stat.noncw;
440}
441
442static int run_exercise(struct etab *e)
443{
444 int nn = (1 << e->symsize) - 1;
445 int kk = nn - e->nroots;
446 struct rs_control *rsc;
447 int retval = -ENOMEM;
448 int max_pad = kk - 1;
449 int prev_pad = -1;
450 struct wspace *ws;
451 int i;
452
453 rsc = init_rs(e->symsize, e->genpoly, e->fcs, e->prim, e->nroots);
454 if (!rsc)
455 return retval;
456
457 ws = alloc_ws(rsc->codec);
458 if (!ws)
459 goto err;
460
461 retval = 0;
462 for (i = 0; i < ARRAY_SIZE(pad_coef); i++) {
463 int pad = (pad_coef[i].mult * max_pad) >> pad_coef[i].shift;
464 int len = nn - pad;
465
466 if (pad == prev_pad)
467 continue;
468
469 prev_pad = pad;
470 if (v >= V_PROGRESS) {
471 pr_info("Testing (%d,%d)_%d code...\n",
472 len, kk - pad, nn + 1);
473 }
474
475 retval |= exercise_rs(rsc, ws, len, e->ntrials);
476 if (bc)
477 retval |= exercise_rs_bc(rsc, ws, len, e->ntrials);
478 }
479
480 free_ws(ws);
481
482err:
483 free_rs(rsc);
484 return retval;
485}
486
487static int __init test_rslib_init(void)
488{
489 int i, fail = 0;
490
491 for (i = 0; Tab[i].symsize != 0 ; i++) {
492 int retval;
493
494 retval = run_exercise(Tab + i);
495 if (retval < 0)
496 return -ENOMEM;
497
498 fail |= retval;
499 }
500
501 if (fail)
502 pr_warn("rslib: test failed\n");
503 else
504 pr_info("rslib: test ok\n");
505
506 return -EAGAIN; /* Fail will directly unload the module */
507}
508
509static void __exit test_rslib_exit(void)
510{
511}
512
513module_init(test_rslib_init)
514module_exit(test_rslib_exit)
515
516MODULE_LICENSE("GPL");
517MODULE_AUTHOR("Ferdinand Blomqvist");
518MODULE_DESCRIPTION("Reed-Solomon library test");
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 5d4bad8bd96a..9d631a7b6a70 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -38,6 +38,12 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
38 return xa_store(xa, index, xa_mk_index(index), gfp); 38 return xa_store(xa, index, xa_mk_index(index), gfp);
39} 39}
40 40
41static void xa_insert_index(struct xarray *xa, unsigned long index)
42{
43 XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index),
44 GFP_KERNEL) != 0);
45}
46
41static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) 47static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
42{ 48{
43 u32 id; 49 u32 id;
@@ -338,6 +344,37 @@ static noinline void check_xa_shrink(struct xarray *xa)
338 } 344 }
339} 345}
340 346
347static noinline void check_insert(struct xarray *xa)
348{
349 unsigned long i;
350
351 for (i = 0; i < 1024; i++) {
352 xa_insert_index(xa, i);
353 XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL);
354 XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL);
355 xa_erase_index(xa, i);
356 }
357
358 for (i = 10; i < BITS_PER_LONG; i++) {
359 xa_insert_index(xa, 1UL << i);
360 XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL);
361 XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL);
362 xa_erase_index(xa, 1UL << i);
363
364 xa_insert_index(xa, (1UL << i) - 1);
365 XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL);
366 XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL);
367 xa_erase_index(xa, (1UL << i) - 1);
368 }
369
370 xa_insert_index(xa, ~0UL);
371 XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL);
372 XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL);
373 xa_erase_index(xa, ~0UL);
374
375 XA_BUG_ON(xa, !xa_empty(xa));
376}
377
341static noinline void check_cmpxchg(struct xarray *xa) 378static noinline void check_cmpxchg(struct xarray *xa)
342{ 379{
343 void *FIVE = xa_mk_value(5); 380 void *FIVE = xa_mk_value(5);
@@ -1527,6 +1564,7 @@ static int xarray_checks(void)
1527 check_xa_mark(&array); 1564 check_xa_mark(&array);
1528 check_xa_shrink(&array); 1565 check_xa_shrink(&array);
1529 check_xas_erase(&array); 1566 check_xas_erase(&array);
1567 check_insert(&array);
1530 check_cmpxchg(&array); 1568 check_cmpxchg(&array);
1531 check_reserve(&array); 1569 check_reserve(&array);
1532 check_reserve(&xa0); 1570 check_reserve(&xa0);
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
new file mode 100644
index 000000000000..cc00364bd2c2
--- /dev/null
+++ b/lib/vdso/Kconfig
@@ -0,0 +1,36 @@
1# SPDX-License-Identifier: GPL-2.0
2
3config HAVE_GENERIC_VDSO
4 bool
5
6if HAVE_GENERIC_VDSO
7
8config GENERIC_GETTIMEOFDAY
9 bool
10 help
11 This is a generic implementation of gettimeofday vdso.
12 Each architecture that enables this feature has to
13 provide the fallback implementation.
14
15config GENERIC_VDSO_32
16 bool
17 depends on GENERIC_GETTIMEOFDAY && !64BIT
18 help
19 This config option helps to avoid possible performance issues
20 in 32 bit only architectures.
21
22config GENERIC_COMPAT_VDSO
23 bool
24 help
25 This config option enables the compat VDSO layer.
26
27config CROSS_COMPILE_COMPAT_VDSO
28 string "32 bit Toolchain prefix for compat vDSO"
29 default ""
30 depends on GENERIC_COMPAT_VDSO
31 help
32 Defines the cross-compiler prefix for compiling compat vDSO.
33 If a 64 bit compiler (i.e. x86_64) can compile the VDSO for
34 32 bit, it does not need to define this parameter.
35
36endif
diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile
new file mode 100644
index 000000000000..c415a685d61b
--- /dev/null
+++ b/lib/vdso/Makefile
@@ -0,0 +1,22 @@
1# SPDX-License-Identifier: GPL-2.0
2
3GENERIC_VDSO_MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
4GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH))
5
6c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c)
7
8# This cmd checks that the vdso library does not contain absolute relocation
9# It has to be called after the linking of the vdso library and requires it
10# as a parameter.
11#
12# $(ARCH_REL_TYPE_ABS) is defined in the arch specific makefile and corresponds
13# to the absolute relocation types printed by "objdump -R" and accepted by the
14# dynamic linker.
15ifndef ARCH_REL_TYPE_ABS
16$(error ARCH_REL_TYPE_ABS is not set)
17endif
18
19quiet_cmd_vdso_check = VDSOCHK $@
20 cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \
21 then (echo >&2 "$@: dynamic relocations are not supported"; \
22 rm -f $@; /bin/false); fi
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
new file mode 100644
index 000000000000..2d1c1f241fd9
--- /dev/null
+++ b/lib/vdso/gettimeofday.c
@@ -0,0 +1,239 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Generic userspace implementations of gettimeofday() and similar.
4 */
5#include <linux/compiler.h>
6#include <linux/math64.h>
7#include <linux/time.h>
8#include <linux/kernel.h>
9#include <linux/hrtimer_defs.h>
10#include <vdso/datapage.h>
11#include <vdso/helpers.h>
12
13/*
14 * The generic vDSO implementation requires that gettimeofday.h
15 * provides:
16 * - __arch_get_vdso_data(): to get the vdso datapage.
17 * - __arch_get_hw_counter(): to get the hw counter based on the
18 * clock_mode.
19 * - gettimeofday_fallback(): fallback for gettimeofday.
20 * - clock_gettime_fallback(): fallback for clock_gettime.
21 * - clock_getres_fallback(): fallback for clock_getres.
22 */
23#ifdef ENABLE_COMPAT_VDSO
24#include <asm/vdso/compat_gettimeofday.h>
25#else
26#include <asm/vdso/gettimeofday.h>
27#endif /* ENABLE_COMPAT_VDSO */
28
29#ifndef vdso_calc_delta
30/*
31 * Default implementation which works for all sane clocksources. That
32 * obviously excludes x86/TSC.
33 */
34static __always_inline
35u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
36{
37 return ((cycles - last) & mask) * mult;
38}
39#endif
40
41static int do_hres(const struct vdso_data *vd, clockid_t clk,
42 struct __kernel_timespec *ts)
43{
44 const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
45 u64 cycles, last, sec, ns;
46 u32 seq;
47
48 do {
49 seq = vdso_read_begin(vd);
50 cycles = __arch_get_hw_counter(vd->clock_mode);
51 ns = vdso_ts->nsec;
52 last = vd->cycle_last;
53 if (unlikely((s64)cycles < 0))
54 return clock_gettime_fallback(clk, ts);
55
56 ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
57 ns >>= vd->shift;
58 sec = vdso_ts->sec;
59 } while (unlikely(vdso_read_retry(vd, seq)));
60
61 /*
62 * Do this outside the loop: a race inside the loop could result
63 * in __iter_div_u64_rem() being extremely slow.
64 */
65 ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
66 ts->tv_nsec = ns;
67
68 return 0;
69}
70
71static void do_coarse(const struct vdso_data *vd, clockid_t clk,
72 struct __kernel_timespec *ts)
73{
74 const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
75 u32 seq;
76
77 do {
78 seq = vdso_read_begin(vd);
79 ts->tv_sec = vdso_ts->sec;
80 ts->tv_nsec = vdso_ts->nsec;
81 } while (unlikely(vdso_read_retry(vd, seq)));
82}
83
84static __maybe_unused int
85__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
86{
87 const struct vdso_data *vd = __arch_get_vdso_data();
88 u32 msk;
89
90 /* Check for negative values or invalid clocks */
91 if (unlikely((u32) clock >= MAX_CLOCKS))
92 goto fallback;
93
94 /*
95 * Convert the clockid to a bitmask and use it to check which
96 * clocks are handled in the VDSO directly.
97 */
98 msk = 1U << clock;
99 if (likely(msk & VDSO_HRES)) {
100 return do_hres(&vd[CS_HRES_COARSE], clock, ts);
101 } else if (msk & VDSO_COARSE) {
102 do_coarse(&vd[CS_HRES_COARSE], clock, ts);
103 return 0;
104 } else if (msk & VDSO_RAW) {
105 return do_hres(&vd[CS_RAW], clock, ts);
106 }
107
108fallback:
109 return clock_gettime_fallback(clock, ts);
110}
111
112static __maybe_unused int
113__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
114{
115 struct __kernel_timespec ts;
116 int ret;
117
118 if (res == NULL)
119 goto fallback;
120
121 ret = __cvdso_clock_gettime(clock, &ts);
122
123 if (ret == 0) {
124 res->tv_sec = ts.tv_sec;
125 res->tv_nsec = ts.tv_nsec;
126 }
127
128 return ret;
129
130fallback:
131 return clock_gettime_fallback(clock, (struct __kernel_timespec *)res);
132}
133
134static __maybe_unused int
135__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
136{
137 const struct vdso_data *vd = __arch_get_vdso_data();
138
139 if (likely(tv != NULL)) {
140 struct __kernel_timespec ts;
141
142 if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
143 return gettimeofday_fallback(tv, tz);
144
145 tv->tv_sec = ts.tv_sec;
146 tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC;
147 }
148
149 if (unlikely(tz != NULL)) {
150 tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
151 tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
152 }
153
154 return 0;
155}
156
157#ifdef VDSO_HAS_TIME
158static __maybe_unused time_t __cvdso_time(time_t *time)
159{
160 const struct vdso_data *vd = __arch_get_vdso_data();
161 time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
162
163 if (time)
164 *time = t;
165
166 return t;
167}
168#endif /* VDSO_HAS_TIME */
169
170#ifdef VDSO_HAS_CLOCK_GETRES
171static __maybe_unused
172int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
173{
174 const struct vdso_data *vd = __arch_get_vdso_data();
175 u64 ns;
176 u32 msk;
177 u64 hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
178
179 /* Check for negative values or invalid clocks */
180 if (unlikely((u32) clock >= MAX_CLOCKS))
181 goto fallback;
182
183 /*
184 * Convert the clockid to a bitmask and use it to check which
185 * clocks are handled in the VDSO directly.
186 */
187 msk = 1U << clock;
188 if (msk & VDSO_HRES) {
189 /*
190 * Preserves the behaviour of posix_get_hrtimer_res().
191 */
192 ns = hrtimer_res;
193 } else if (msk & VDSO_COARSE) {
194 /*
195 * Preserves the behaviour of posix_get_coarse_res().
196 */
197 ns = LOW_RES_NSEC;
198 } else if (msk & VDSO_RAW) {
199 /*
200 * Preserves the behaviour of posix_get_hrtimer_res().
201 */
202 ns = hrtimer_res;
203 } else {
204 goto fallback;
205 }
206
207 if (res) {
208 res->tv_sec = 0;
209 res->tv_nsec = ns;
210 }
211
212 return 0;
213
214fallback:
215 return clock_getres_fallback(clock, res);
216}
217
218static __maybe_unused int
219__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
220{
221 struct __kernel_timespec ts;
222 int ret;
223
224 if (res == NULL)
225 goto fallback;
226
227 ret = __cvdso_clock_getres(clock, &ts);
228
229 if (ret == 0) {
230 res->tv_sec = ts.tv_sec;
231 res->tv_nsec = ts.tv_nsec;
232 }
233
234 return ret;
235
236fallback:
237 return clock_getres_fallback(clock, (struct __kernel_timespec *)res);
238}
239#endif /* VDSO_HAS_CLOCK_GETRES */
diff --git a/lib/xarray.c b/lib/xarray.c
index 6be3acbb861f..446b956c9188 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -298,6 +298,8 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
298 xas_destroy(xas); 298 xas_destroy(xas);
299 return false; 299 return false;
300 } 300 }
301 if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
302 gfp |= __GFP_ACCOUNT;
301 xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); 303 xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
302 if (!xas->xa_alloc) 304 if (!xas->xa_alloc)
303 return false; 305 return false;
@@ -325,6 +327,8 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
325 xas_destroy(xas); 327 xas_destroy(xas);
326 return false; 328 return false;
327 } 329 }
330 if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
331 gfp |= __GFP_ACCOUNT;
328 if (gfpflags_allow_blocking(gfp)) { 332 if (gfpflags_allow_blocking(gfp)) {
329 xas_unlock_type(xas, lock_type); 333 xas_unlock_type(xas, lock_type);
330 xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); 334 xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
@@ -358,8 +362,12 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift)
358 if (node) { 362 if (node) {
359 xas->xa_alloc = NULL; 363 xas->xa_alloc = NULL;
360 } else { 364 } else {
361 node = kmem_cache_alloc(radix_tree_node_cachep, 365 gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
362 GFP_NOWAIT | __GFP_NOWARN); 366
367 if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
368 gfp |= __GFP_ACCOUNT;
369
370 node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
363 if (!node) { 371 if (!node) {
364 xas_set_err(xas, -ENOMEM); 372 xas_set_err(xas, -ENOMEM);
365 return NULL; 373 return NULL;
diff --git a/mm/filemap.c b/mm/filemap.c
index df2006ba0cfa..6dd9a2274c80 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -281,11 +281,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
281 * @pvec: pagevec with pages to delete 281 * @pvec: pagevec with pages to delete
282 * 282 *
283 * The function walks over mapping->i_pages and removes pages passed in @pvec 283 * The function walks over mapping->i_pages and removes pages passed in @pvec
284 * from the mapping. The function expects @pvec to be sorted by page index 284 * from the mapping. The function expects @pvec to be sorted by page index.
285 * and is optimised for it to be dense.
286 * It tolerates holes in @pvec (mapping entries at those indices are not 285 * It tolerates holes in @pvec (mapping entries at those indices are not
287 * modified). The function expects only THP head pages to be present in the 286 * modified). The function expects only THP head pages to be present in the
288 * @pvec. 287 * @pvec and takes care to delete all corresponding tail pages from the
288 * mapping as well.
289 * 289 *
290 * The function expects the i_pages lock to be held. 290 * The function expects the i_pages lock to be held.
291 */ 291 */
@@ -294,44 +294,40 @@ static void page_cache_delete_batch(struct address_space *mapping,
294{ 294{
295 XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); 295 XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
296 int total_pages = 0; 296 int total_pages = 0;
297 int i = 0; 297 int i = 0, tail_pages = 0;
298 struct page *page; 298 struct page *page;
299 299
300 mapping_set_update(&xas, mapping); 300 mapping_set_update(&xas, mapping);
301 xas_for_each(&xas, page, ULONG_MAX) { 301 xas_for_each(&xas, page, ULONG_MAX) {
302 if (i >= pagevec_count(pvec)) 302 if (i >= pagevec_count(pvec) && !tail_pages)
303 break; 303 break;
304
305 /* A swap/dax/shadow entry got inserted? Skip it. */
306 if (xa_is_value(page)) 304 if (xa_is_value(page))
307 continue; 305 continue;
308 /* 306 if (!tail_pages) {
309 * A page got inserted in our range? Skip it. We have our 307 /*
310 * pages locked so they are protected from being removed. 308 * Some page got inserted in our range? Skip it. We
311 * If we see a page whose index is higher than ours, it 309 * have our pages locked so they are protected from
312 * means our page has been removed, which shouldn't be 310 * being removed.
313 * possible because we're holding the PageLock. 311 */
314 */ 312 if (page != pvec->pages[i]) {
315 if (page != pvec->pages[i]) { 313 VM_BUG_ON_PAGE(page->index >
316 VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, 314 pvec->pages[i]->index, page);
317 page); 315 continue;
318 continue; 316 }
319 } 317 WARN_ON_ONCE(!PageLocked(page));
320 318 if (PageTransHuge(page) && !PageHuge(page))
321 WARN_ON_ONCE(!PageLocked(page)); 319 tail_pages = HPAGE_PMD_NR - 1;
322
323 if (page->index == xas.xa_index)
324 page->mapping = NULL; 320 page->mapping = NULL;
325 /* Leave page->index set: truncation lookup relies on it */ 321 /*
326 322 * Leave page->index set: truncation lookup relies
327 /* 323 * upon it
328 * Move to the next page in the vector if this is a regular 324 */
329 * page or the index is of the last sub-page of this compound
330 * page.
331 */
332 if (page->index + (1UL << compound_order(page)) - 1 ==
333 xas.xa_index)
334 i++; 325 i++;
326 } else {
327 VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
328 != pvec->pages[i]->index, page);
329 tail_pages--;
330 }
335 xas_store(&xas, NULL); 331 xas_store(&xas, NULL);
336 total_pages++; 332 total_pages++;
337 } 333 }
@@ -1498,7 +1494,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
1498struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) 1494struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
1499{ 1495{
1500 XA_STATE(xas, &mapping->i_pages, offset); 1496 XA_STATE(xas, &mapping->i_pages, offset);
1501 struct page *page; 1497 struct page *head, *page;
1502 1498
1503 rcu_read_lock(); 1499 rcu_read_lock();
1504repeat: 1500repeat:
@@ -1513,19 +1509,25 @@ repeat:
1513 if (!page || xa_is_value(page)) 1509 if (!page || xa_is_value(page))
1514 goto out; 1510 goto out;
1515 1511
1516 if (!page_cache_get_speculative(page)) 1512 head = compound_head(page);
1513 if (!page_cache_get_speculative(head))
1514 goto repeat;
1515
1516 /* The page was split under us? */
1517 if (compound_head(page) != head) {
1518 put_page(head);
1517 goto repeat; 1519 goto repeat;
1520 }
1518 1521
1519 /* 1522 /*
1520 * Has the page moved or been split? 1523 * Has the page moved?
1521 * This is part of the lockless pagecache protocol. See 1524 * This is part of the lockless pagecache protocol. See
1522 * include/linux/pagemap.h for details. 1525 * include/linux/pagemap.h for details.
1523 */ 1526 */
1524 if (unlikely(page != xas_reload(&xas))) { 1527 if (unlikely(page != xas_reload(&xas))) {
1525 put_page(page); 1528 put_page(head);
1526 goto repeat; 1529 goto repeat;
1527 } 1530 }
1528 page = find_subpage(page, offset);
1529out: 1531out:
1530 rcu_read_unlock(); 1532 rcu_read_unlock();
1531 1533
@@ -1707,6 +1709,7 @@ unsigned find_get_entries(struct address_space *mapping,
1707 1709
1708 rcu_read_lock(); 1710 rcu_read_lock();
1709 xas_for_each(&xas, page, ULONG_MAX) { 1711 xas_for_each(&xas, page, ULONG_MAX) {
1712 struct page *head;
1710 if (xas_retry(&xas, page)) 1713 if (xas_retry(&xas, page))
1711 continue; 1714 continue;
1712 /* 1715 /*
@@ -1717,13 +1720,17 @@ unsigned find_get_entries(struct address_space *mapping,
1717 if (xa_is_value(page)) 1720 if (xa_is_value(page))
1718 goto export; 1721 goto export;
1719 1722
1720 if (!page_cache_get_speculative(page)) 1723 head = compound_head(page);
1724 if (!page_cache_get_speculative(head))
1721 goto retry; 1725 goto retry;
1722 1726
1723 /* Has the page moved or been split? */ 1727 /* The page was split under us? */
1728 if (compound_head(page) != head)
1729 goto put_page;
1730
1731 /* Has the page moved? */
1724 if (unlikely(page != xas_reload(&xas))) 1732 if (unlikely(page != xas_reload(&xas)))
1725 goto put_page; 1733 goto put_page;
1726 page = find_subpage(page, xas.xa_index);
1727 1734
1728export: 1735export:
1729 indices[ret] = xas.xa_index; 1736 indices[ret] = xas.xa_index;
@@ -1732,7 +1739,7 @@ export:
1732 break; 1739 break;
1733 continue; 1740 continue;
1734put_page: 1741put_page:
1735 put_page(page); 1742 put_page(head);
1736retry: 1743retry:
1737 xas_reset(&xas); 1744 xas_reset(&xas);
1738 } 1745 }
@@ -1774,27 +1781,33 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
1774 1781
1775 rcu_read_lock(); 1782 rcu_read_lock();
1776 xas_for_each(&xas, page, end) { 1783 xas_for_each(&xas, page, end) {
1784 struct page *head;
1777 if (xas_retry(&xas, page)) 1785 if (xas_retry(&xas, page))
1778 continue; 1786 continue;
1779 /* Skip over shadow, swap and DAX entries */ 1787 /* Skip over shadow, swap and DAX entries */
1780 if (xa_is_value(page)) 1788 if (xa_is_value(page))
1781 continue; 1789 continue;
1782 1790
1783 if (!page_cache_get_speculative(page)) 1791 head = compound_head(page);
1792 if (!page_cache_get_speculative(head))
1784 goto retry; 1793 goto retry;
1785 1794
1786 /* Has the page moved or been split? */ 1795 /* The page was split under us? */
1796 if (compound_head(page) != head)
1797 goto put_page;
1798
1799 /* Has the page moved? */
1787 if (unlikely(page != xas_reload(&xas))) 1800 if (unlikely(page != xas_reload(&xas)))
1788 goto put_page; 1801 goto put_page;
1789 1802
1790 pages[ret] = find_subpage(page, xas.xa_index); 1803 pages[ret] = page;
1791 if (++ret == nr_pages) { 1804 if (++ret == nr_pages) {
1792 *start = xas.xa_index + 1; 1805 *start = xas.xa_index + 1;
1793 goto out; 1806 goto out;
1794 } 1807 }
1795 continue; 1808 continue;
1796put_page: 1809put_page:
1797 put_page(page); 1810 put_page(head);
1798retry: 1811retry:
1799 xas_reset(&xas); 1812 xas_reset(&xas);
1800 } 1813 }
@@ -1839,6 +1852,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
1839 1852
1840 rcu_read_lock(); 1853 rcu_read_lock();
1841 for (page = xas_load(&xas); page; page = xas_next(&xas)) { 1854 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1855 struct page *head;
1842 if (xas_retry(&xas, page)) 1856 if (xas_retry(&xas, page))
1843 continue; 1857 continue;
1844 /* 1858 /*
@@ -1848,19 +1862,24 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
1848 if (xa_is_value(page)) 1862 if (xa_is_value(page))
1849 break; 1863 break;
1850 1864
1851 if (!page_cache_get_speculative(page)) 1865 head = compound_head(page);
1866 if (!page_cache_get_speculative(head))
1852 goto retry; 1867 goto retry;
1853 1868
1854 /* Has the page moved or been split? */ 1869 /* The page was split under us? */
1870 if (compound_head(page) != head)
1871 goto put_page;
1872
1873 /* Has the page moved? */
1855 if (unlikely(page != xas_reload(&xas))) 1874 if (unlikely(page != xas_reload(&xas)))
1856 goto put_page; 1875 goto put_page;
1857 1876
1858 pages[ret] = find_subpage(page, xas.xa_index); 1877 pages[ret] = page;
1859 if (++ret == nr_pages) 1878 if (++ret == nr_pages)
1860 break; 1879 break;
1861 continue; 1880 continue;
1862put_page: 1881put_page:
1863 put_page(page); 1882 put_page(head);
1864retry: 1883retry:
1865 xas_reset(&xas); 1884 xas_reset(&xas);
1866 } 1885 }
@@ -1896,6 +1915,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
1896 1915
1897 rcu_read_lock(); 1916 rcu_read_lock();
1898 xas_for_each_marked(&xas, page, end, tag) { 1917 xas_for_each_marked(&xas, page, end, tag) {
1918 struct page *head;
1899 if (xas_retry(&xas, page)) 1919 if (xas_retry(&xas, page))
1900 continue; 1920 continue;
1901 /* 1921 /*
@@ -1906,21 +1926,26 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
1906 if (xa_is_value(page)) 1926 if (xa_is_value(page))
1907 continue; 1927 continue;
1908 1928
1909 if (!page_cache_get_speculative(page)) 1929 head = compound_head(page);
1930 if (!page_cache_get_speculative(head))
1910 goto retry; 1931 goto retry;
1911 1932
1912 /* Has the page moved or been split? */ 1933 /* The page was split under us? */
1934 if (compound_head(page) != head)
1935 goto put_page;
1936
1937 /* Has the page moved? */
1913 if (unlikely(page != xas_reload(&xas))) 1938 if (unlikely(page != xas_reload(&xas)))
1914 goto put_page; 1939 goto put_page;
1915 1940
1916 pages[ret] = find_subpage(page, xas.xa_index); 1941 pages[ret] = page;
1917 if (++ret == nr_pages) { 1942 if (++ret == nr_pages) {
1918 *index = xas.xa_index + 1; 1943 *index = xas.xa_index + 1;
1919 goto out; 1944 goto out;
1920 } 1945 }
1921 continue; 1946 continue;
1922put_page: 1947put_page:
1923 put_page(page); 1948 put_page(head);
1924retry: 1949retry:
1925 xas_reset(&xas); 1950 xas_reset(&xas);
1926 } 1951 }
@@ -2603,7 +2628,7 @@ void filemap_map_pages(struct vm_fault *vmf,
2603 pgoff_t last_pgoff = start_pgoff; 2628 pgoff_t last_pgoff = start_pgoff;
2604 unsigned long max_idx; 2629 unsigned long max_idx;
2605 XA_STATE(xas, &mapping->i_pages, start_pgoff); 2630 XA_STATE(xas, &mapping->i_pages, start_pgoff);
2606 struct page *page; 2631 struct page *head, *page;
2607 2632
2608 rcu_read_lock(); 2633 rcu_read_lock();
2609 xas_for_each(&xas, page, end_pgoff) { 2634 xas_for_each(&xas, page, end_pgoff) {
@@ -2612,19 +2637,24 @@ void filemap_map_pages(struct vm_fault *vmf,
2612 if (xa_is_value(page)) 2637 if (xa_is_value(page))
2613 goto next; 2638 goto next;
2614 2639
2640 head = compound_head(page);
2641
2615 /* 2642 /*
2616 * Check for a locked page first, as a speculative 2643 * Check for a locked page first, as a speculative
2617 * reference may adversely influence page migration. 2644 * reference may adversely influence page migration.
2618 */ 2645 */
2619 if (PageLocked(page)) 2646 if (PageLocked(head))
2620 goto next; 2647 goto next;
2621 if (!page_cache_get_speculative(page)) 2648 if (!page_cache_get_speculative(head))
2622 goto next; 2649 goto next;
2623 2650
2624 /* Has the page moved or been split? */ 2651 /* The page was split under us? */
2652 if (compound_head(page) != head)
2653 goto skip;
2654
2655 /* Has the page moved? */
2625 if (unlikely(page != xas_reload(&xas))) 2656 if (unlikely(page != xas_reload(&xas)))
2626 goto skip; 2657 goto skip;
2627 page = find_subpage(page, xas.xa_index);
2628 2658
2629 if (!PageUptodate(page) || 2659 if (!PageUptodate(page) ||
2630 PageReadahead(page) || 2660 PageReadahead(page) ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bb8b617e34ed..885642c82aaa 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2496,9 +2496,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
2496 if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) 2496 if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
2497 shmem_uncharge(head->mapping->host, 1); 2497 shmem_uncharge(head->mapping->host, 1);
2498 put_page(head + i); 2498 put_page(head + i);
2499 } else if (!PageAnon(page)) {
2500 __xa_store(&head->mapping->i_pages, head[i].index,
2501 head + i, 0);
2502 } 2499 }
2503 } 2500 }
2504 2501
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ac843d32b019..ede7e7f5d1ab 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1510,16 +1510,29 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
1510 1510
1511/* 1511/*
1512 * Dissolve a given free hugepage into free buddy pages. This function does 1512 * Dissolve a given free hugepage into free buddy pages. This function does
1513 * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the 1513 * nothing for in-use hugepages and non-hugepages.
1514 * dissolution fails because a give page is not a free hugepage, or because 1514 * This function returns values like below:
1515 * free hugepages are fully reserved. 1515 *
1516 * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use
1517 * (allocated or reserved.)
1518 * 0: successfully dissolved free hugepages or the page is not a
1519 * hugepage (considered as already dissolved)
1516 */ 1520 */
1517int dissolve_free_huge_page(struct page *page) 1521int dissolve_free_huge_page(struct page *page)
1518{ 1522{
1519 int rc = -EBUSY; 1523 int rc = -EBUSY;
1520 1524
1525 /* Not to disrupt normal path by vainly holding hugetlb_lock */
1526 if (!PageHuge(page))
1527 return 0;
1528
1521 spin_lock(&hugetlb_lock); 1529 spin_lock(&hugetlb_lock);
1522 if (PageHuge(page) && !page_count(page)) { 1530 if (!PageHuge(page)) {
1531 rc = 0;
1532 goto out;
1533 }
1534
1535 if (!page_count(page)) {
1523 struct page *head = compound_head(page); 1536 struct page *head = compound_head(page);
1524 struct hstate *h = page_hstate(head); 1537 struct hstate *h = page_hstate(head);
1525 int nid = page_to_nid(head); 1538 int nid = page_to_nid(head);
@@ -1564,11 +1577,9 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
1564 1577
1565 for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) { 1578 for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) {
1566 page = pfn_to_page(pfn); 1579 page = pfn_to_page(pfn);
1567 if (PageHuge(page) && !page_count(page)) { 1580 rc = dissolve_free_huge_page(page);
1568 rc = dissolve_free_huge_page(page); 1581 if (rc)
1569 if (rc) 1582 break;
1570 break;
1571 }
1572 } 1583 }
1573 1584
1574 return rc; 1585 return rc;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0f7419938008..eaaa21b23215 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1378,7 +1378,7 @@ static void collapse_shmem(struct mm_struct *mm,
1378 result = SCAN_FAIL; 1378 result = SCAN_FAIL;
1379 goto xa_locked; 1379 goto xa_locked;
1380 } 1380 }
1381 xas_store(&xas, new_page); 1381 xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
1382 nr_none++; 1382 nr_none++;
1383 continue; 1383 continue;
1384 } 1384 }
@@ -1454,7 +1454,7 @@ static void collapse_shmem(struct mm_struct *mm,
1454 list_add_tail(&page->lru, &pagelist); 1454 list_add_tail(&page->lru, &pagelist);
1455 1455
1456 /* Finally, replace with the new page. */ 1456 /* Finally, replace with the new page. */
1457 xas_store(&xas, new_page); 1457 xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
1458 continue; 1458 continue;
1459out_unlock: 1459out_unlock:
1460 unlock_page(page); 1460 unlock_page(page);
diff --git a/mm/memfd.c b/mm/memfd.c
index 2647c898990c..650e65a46b9c 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -39,7 +39,6 @@ static void memfd_tag_pins(struct xa_state *xas)
39 xas_for_each(xas, page, ULONG_MAX) { 39 xas_for_each(xas, page, ULONG_MAX) {
40 if (xa_is_value(page)) 40 if (xa_is_value(page))
41 continue; 41 continue;
42 page = find_subpage(page, xas->xa_index);
43 if (page_count(page) - page_mapcount(page) > 1) 42 if (page_count(page) - page_mapcount(page) > 1)
44 xas_set_mark(xas, MEMFD_TAG_PINNED); 43 xas_set_mark(xas, MEMFD_TAG_PINNED);
45 44
@@ -89,7 +88,6 @@ static int memfd_wait_for_pins(struct address_space *mapping)
89 bool clear = true; 88 bool clear = true;
90 if (xa_is_value(page)) 89 if (xa_is_value(page))
91 continue; 90 continue;
92 page = find_subpage(page, xas.xa_index);
93 if (page_count(page) - page_mapcount(page) != 1) { 91 if (page_count(page) - page_mapcount(page) != 1) {
94 /* 92 /*
95 * On the last scan, we clean up all those tags 93 * On the last scan, we clean up all those tags
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8da0334b9ca0..d9cc6606f409 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1730,6 +1730,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
1730 if (!ret) { 1730 if (!ret) {
1731 if (set_hwpoison_free_buddy_page(page)) 1731 if (set_hwpoison_free_buddy_page(page))
1732 num_poisoned_pages_inc(); 1732 num_poisoned_pages_inc();
1733 else
1734 ret = -EBUSY;
1733 } 1735 }
1734 } 1736 }
1735 return ret; 1737 return ret;
@@ -1854,11 +1856,8 @@ static int soft_offline_in_use_page(struct page *page, int flags)
1854 1856
1855static int soft_offline_free_page(struct page *page) 1857static int soft_offline_free_page(struct page *page)
1856{ 1858{
1857 int rc = 0; 1859 int rc = dissolve_free_huge_page(page);
1858 struct page *head = compound_head(page);
1859 1860
1860 if (PageHuge(head))
1861 rc = dissolve_free_huge_page(page);
1862 if (!rc) { 1861 if (!rc) {
1863 if (set_hwpoison_free_buddy_page(page)) 1862 if (set_hwpoison_free_buddy_page(page))
1864 num_poisoned_pages_inc(); 1863 num_poisoned_pages_inc();
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 01600d80ae01..fdcb73536319 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -306,7 +306,7 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
306 else { 306 else {
307 nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed, 307 nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed,
308 *nodes); 308 *nodes);
309 pol->w.cpuset_mems_allowed = tmp; 309 pol->w.cpuset_mems_allowed = *nodes;
310 } 310 }
311 311
312 if (nodes_empty(tmp)) 312 if (nodes_empty(tmp))
diff --git a/mm/migrate.c b/mm/migrate.c
index f2ecc2855a12..e9594bc0d406 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -463,7 +463,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
463 463
464 for (i = 1; i < HPAGE_PMD_NR; i++) { 464 for (i = 1; i < HPAGE_PMD_NR; i++) {
465 xas_next(&xas); 465 xas_next(&xas);
466 xas_store(&xas, newpage); 466 xas_store(&xas, newpage + i);
467 } 467 }
468 } 468 }
469 469
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 5a58778c91d4..f719b64741d6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -987,8 +987,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
987/* 987/*
988 * Determines whether the kernel must panic because of the panic_on_oom sysctl. 988 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
989 */ 989 */
990static void check_panic_on_oom(struct oom_control *oc, 990static void check_panic_on_oom(struct oom_control *oc)
991 enum oom_constraint constraint)
992{ 991{
993 if (likely(!sysctl_panic_on_oom)) 992 if (likely(!sysctl_panic_on_oom))
994 return; 993 return;
@@ -998,7 +997,7 @@ static void check_panic_on_oom(struct oom_control *oc,
998 * does not panic for cpuset, mempolicy, or memcg allocation 997 * does not panic for cpuset, mempolicy, or memcg allocation
999 * failures. 998 * failures.
1000 */ 999 */
1001 if (constraint != CONSTRAINT_NONE) 1000 if (oc->constraint != CONSTRAINT_NONE)
1002 return; 1001 return;
1003 } 1002 }
1004 /* Do not panic for oom kills triggered by sysrq */ 1003 /* Do not panic for oom kills triggered by sysrq */
@@ -1035,7 +1034,6 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
1035bool out_of_memory(struct oom_control *oc) 1034bool out_of_memory(struct oom_control *oc)
1036{ 1035{
1037 unsigned long freed = 0; 1036 unsigned long freed = 0;
1038 enum oom_constraint constraint = CONSTRAINT_NONE;
1039 1037
1040 if (oom_killer_disabled) 1038 if (oom_killer_disabled)
1041 return false; 1039 return false;
@@ -1071,10 +1069,10 @@ bool out_of_memory(struct oom_control *oc)
1071 * Check if there were limitations on the allocation (only relevant for 1069 * Check if there were limitations on the allocation (only relevant for
1072 * NUMA and memcg) that may require different handling. 1070 * NUMA and memcg) that may require different handling.
1073 */ 1071 */
1074 constraint = constrained_alloc(oc); 1072 oc->constraint = constrained_alloc(oc);
1075 if (constraint != CONSTRAINT_MEMORY_POLICY) 1073 if (oc->constraint != CONSTRAINT_MEMORY_POLICY)
1076 oc->nodemask = NULL; 1074 oc->nodemask = NULL;
1077 check_panic_on_oom(oc, constraint); 1075 check_panic_on_oom(oc);
1078 1076
1079 if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && 1077 if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
1080 current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && 1078 current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d66bc8abe0af..8e3bc949ebcc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1826,7 +1826,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
1826 first_deferred_pfn)) { 1826 first_deferred_pfn)) {
1827 pgdat->first_deferred_pfn = ULONG_MAX; 1827 pgdat->first_deferred_pfn = ULONG_MAX;
1828 pgdat_resize_unlock(pgdat, &flags); 1828 pgdat_resize_unlock(pgdat, &flags);
1829 return true; 1829 /* Retry only once. */
1830 return first_deferred_pfn != ULONG_MAX;
1830 } 1831 }
1831 1832
1832 /* 1833 /*
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 0b39ec0c945c..295512465065 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -136,7 +136,7 @@ static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj,
136 136
137 end_pfn = pfn + count * BITS_PER_BYTE; 137 end_pfn = pfn + count * BITS_PER_BYTE;
138 if (end_pfn > max_pfn) 138 if (end_pfn > max_pfn)
139 end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); 139 end_pfn = max_pfn;
140 140
141 for (; pfn < end_pfn; pfn++) { 141 for (; pfn < end_pfn; pfn++) {
142 bit = pfn % BITMAP_CHUNK_BITS; 142 bit = pfn % BITMAP_CHUNK_BITS;
@@ -181,7 +181,7 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj,
181 181
182 end_pfn = pfn + count * BITS_PER_BYTE; 182 end_pfn = pfn + count * BITS_PER_BYTE;
183 if (end_pfn > max_pfn) 183 if (end_pfn > max_pfn)
184 end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); 184 end_pfn = max_pfn;
185 185
186 for (; pfn < end_pfn; pfn++) { 186 for (; pfn < end_pfn; pfn++) {
187 bit = pfn % BITMAP_CHUNK_BITS; 187 bit = pfn % BITMAP_CHUNK_BITS;
diff --git a/mm/page_io.c b/mm/page_io.c
index 2e8019d0e048..a39aac2f8c8d 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -29,10 +29,9 @@
29static struct bio *get_swap_bio(gfp_t gfp_flags, 29static struct bio *get_swap_bio(gfp_t gfp_flags,
30 struct page *page, bio_end_io_t end_io) 30 struct page *page, bio_end_io_t end_io)
31{ 31{
32 int i, nr = hpage_nr_pages(page);
33 struct bio *bio; 32 struct bio *bio;
34 33
35 bio = bio_alloc(gfp_flags, nr); 34 bio = bio_alloc(gfp_flags, 1);
36 if (bio) { 35 if (bio) {
37 struct block_device *bdev; 36 struct block_device *bdev;
38 37
@@ -41,9 +40,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
41 bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; 40 bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
42 bio->bi_end_io = end_io; 41 bio->bi_end_io = end_io;
43 42
44 for (i = 0; i < nr; i++) 43 bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0);
45 bio_add_page(bio, page + i, PAGE_SIZE, 0);
46 VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr);
47 } 44 }
48 return bio; 45 return bio;
49} 46}
@@ -140,8 +137,10 @@ out:
140 unlock_page(page); 137 unlock_page(page);
141 WRITE_ONCE(bio->bi_private, NULL); 138 WRITE_ONCE(bio->bi_private, NULL);
142 bio_put(bio); 139 bio_put(bio);
143 blk_wake_io_task(waiter); 140 if (waiter) {
144 put_task_struct(waiter); 141 blk_wake_io_task(waiter);
142 put_task_struct(waiter);
143 }
145} 144}
146 145
147int generic_swapfile_activate(struct swap_info_struct *sis, 146int generic_swapfile_activate(struct swap_info_struct *sis,
@@ -398,11 +397,12 @@ int swap_readpage(struct page *page, bool synchronous)
398 * Keep this task valid during swap readpage because the oom killer may 397 * Keep this task valid during swap readpage because the oom killer may
399 * attempt to access it in the page fault retry time check. 398 * attempt to access it in the page fault retry time check.
400 */ 399 */
401 get_task_struct(current);
402 bio->bi_private = current;
403 bio_set_op_attrs(bio, REQ_OP_READ, 0); 400 bio_set_op_attrs(bio, REQ_OP_READ, 0);
404 if (synchronous) 401 if (synchronous) {
405 bio->bi_opf |= REQ_HIPRI; 402 bio->bi_opf |= REQ_HIPRI;
403 get_task_struct(current);
404 bio->bi_private = current;
405 }
406 count_vm_event(PSWPIN); 406 count_vm_event(PSWPIN);
407 bio_get(bio); 407 bio_get(bio);
408 qc = submit_bio(bio); 408 qc = submit_bio(bio);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1bb3b8dc8bb2..f4dce9c8670d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -614,7 +614,7 @@ static int shmem_add_to_page_cache(struct page *page,
614 if (xas_error(&xas)) 614 if (xas_error(&xas))
615 goto unlock; 615 goto unlock;
616next: 616next:
617 xas_store(&xas, page); 617 xas_store(&xas, page + i);
618 if (++i < nr) { 618 if (++i < nr) {
619 xas_next(&xas); 619 xas_next(&xas);
620 goto next; 620 goto next;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index eb714165afd2..85245fdec8d9 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -132,7 +132,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
132 for (i = 0; i < nr; i++) { 132 for (i = 0; i < nr; i++) {
133 VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); 133 VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
134 set_page_private(page + i, entry.val + i); 134 set_page_private(page + i, entry.val + i);
135 xas_store(&xas, page); 135 xas_store(&xas, page + i);
136 xas_next(&xas); 136 xas_next(&xas);
137 } 137 }
138 address_space->nrpages += nr; 138 address_space->nrpages += nr;
@@ -167,7 +167,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
167 167
168 for (i = 0; i < nr; i++) { 168 for (i = 0; i < nr; i++) {
169 void *entry = xas_store(&xas, NULL); 169 void *entry = xas_store(&xas, NULL);
170 VM_BUG_ON_PAGE(entry != page, entry); 170 VM_BUG_ON_PAGE(entry != page + i, entry);
171 set_page_private(page + i, 0); 171 set_page_private(page + i, 0);
172 xas_next(&xas); 172 xas_next(&xas);
173 } 173 }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4c9e150e5ad3..030a544e6602 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -913,7 +913,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
913 unsigned long nva_start_addr, unsigned long size, 913 unsigned long nva_start_addr, unsigned long size,
914 enum fit_type type) 914 enum fit_type type)
915{ 915{
916 struct vmap_area *lva; 916 struct vmap_area *lva = NULL;
917 917
918 if (type == FL_FIT_TYPE) { 918 if (type == FL_FIT_TYPE) {
919 /* 919 /*
@@ -972,7 +972,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
972 if (type != FL_FIT_TYPE) { 972 if (type != FL_FIT_TYPE) {
973 augment_tree_propagate_from(va); 973 augment_tree_propagate_from(va);
974 974
975 if (type == NE_FIT_TYPE) 975 if (lva) /* type == NE_FIT_TYPE */
976 insert_vmap_area_augment(lva, &va->rb_node, 976 insert_vmap_area_augment(lva, &va->rb_node,
977 &free_vmap_area_root, &free_vmap_area_list); 977 &free_vmap_area_root, &free_vmap_area_list);
978 } 978 }
@@ -2128,17 +2128,6 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
2128 int flush_dmap = 0; 2128 int flush_dmap = 0;
2129 int i; 2129 int i;
2130 2130
2131 /*
2132 * The below block can be removed when all architectures that have
2133 * direct map permissions also have set_direct_map_() implementations.
2134 * This is concerned with resetting the direct map any an vm alias with
2135 * execute permissions, without leaving a RW+X window.
2136 */
2137 if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
2138 set_memory_nx((unsigned long)area->addr, area->nr_pages);
2139 set_memory_rw((unsigned long)area->addr, area->nr_pages);
2140 }
2141
2142 remove_vm_area(area->addr); 2131 remove_vm_area(area->addr);
2143 2132
2144 /* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */ 2133 /* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7889f583ced9..910e02c793ff 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3644,19 +3644,18 @@ out:
3644} 3644}
3645 3645
3646/* 3646/*
3647 * pgdat->kswapd_classzone_idx is the highest zone index that a recent 3647 * The pgdat->kswapd_classzone_idx is used to pass the highest zone index to be
3648 * allocation request woke kswapd for. When kswapd has not woken recently, 3648 * reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is not
3649 * the value is MAX_NR_ZONES which is not a valid index. This compares a 3649 * a valid index then either kswapd runs for first time or kswapd couldn't sleep
3650 * given classzone and returns it or the highest classzone index kswapd 3650 * after previous reclaim attempt (node is still unbalanced). In that case
3651 * was recently woke for. 3651 * return the zone index of the previous kswapd reclaim cycle.
3652 */ 3652 */
3653static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat, 3653static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
3654 enum zone_type classzone_idx) 3654 enum zone_type prev_classzone_idx)
3655{ 3655{
3656 if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES) 3656 if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
3657 return classzone_idx; 3657 return prev_classzone_idx;
3658 3658 return pgdat->kswapd_classzone_idx;
3659 return max(pgdat->kswapd_classzone_idx, classzone_idx);
3660} 3659}
3661 3660
3662static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order, 3661static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
@@ -3797,7 +3796,7 @@ kswapd_try_sleep:
3797 3796
3798 /* Read the new order and classzone_idx */ 3797 /* Read the new order and classzone_idx */
3799 alloc_order = reclaim_order = pgdat->kswapd_order; 3798 alloc_order = reclaim_order = pgdat->kswapd_order;
3800 classzone_idx = kswapd_classzone_idx(pgdat, 0); 3799 classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
3801 pgdat->kswapd_order = 0; 3800 pgdat->kswapd_order = 0;
3802 pgdat->kswapd_classzone_idx = MAX_NR_ZONES; 3801 pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
3803 3802
@@ -3851,8 +3850,12 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
3851 if (!cpuset_zone_allowed(zone, gfp_flags)) 3850 if (!cpuset_zone_allowed(zone, gfp_flags))
3852 return; 3851 return;
3853 pgdat = zone->zone_pgdat; 3852 pgdat = zone->zone_pgdat;
3854 pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, 3853
3855 classzone_idx); 3854 if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
3855 pgdat->kswapd_classzone_idx = classzone_idx;
3856 else
3857 pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx,
3858 classzone_idx);
3856 pgdat->kswapd_order = max(pgdat->kswapd_order, order); 3859 pgdat->kswapd_order = max(pgdat->kswapd_order, order);
3857 if (!waitqueue_active(&pgdat->kswapd_wait)) 3860 if (!waitqueue_active(&pgdat->kswapd_wait))
3858 return; 3861 return;
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 19d27bee285e..1555b0c6f7ec 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -160,10 +160,10 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
160 struct in6_addr *daddr, 160 struct in6_addr *daddr,
161 struct sk_buff *skb) 161 struct sk_buff *skb)
162{ 162{
163 struct lowpan_peer *peer;
164 struct in6_addr *nexthop;
165 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 163 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
166 int count = atomic_read(&dev->peer_count); 164 int count = atomic_read(&dev->peer_count);
165 const struct in6_addr *nexthop;
166 struct lowpan_peer *peer;
167 167
168 BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt); 168 BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt);
169 169
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9f77432dbe38..5406d7cd46ad 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1353,7 +1353,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
1353 * actually encrypted before enforcing a key size. 1353 * actually encrypted before enforcing a key size.
1354 */ 1354 */
1355 return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || 1355 return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
1356 hcon->enc_key_size > HCI_MIN_ENC_KEY_SIZE); 1356 hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE);
1357} 1357}
1358 1358
1359static void l2cap_do_start(struct l2cap_chan *chan) 1359static void l2cap_do_start(struct l2cap_chan *chan)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 16f9159234a2..8c2ec35b6512 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -318,6 +318,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
318static int ip_mc_finish_output(struct net *net, struct sock *sk, 318static int ip_mc_finish_output(struct net *net, struct sock *sk,
319 struct sk_buff *skb) 319 struct sk_buff *skb)
320{ 320{
321 struct rtable *new_rt;
321 int ret; 322 int ret;
322 323
323 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 324 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
@@ -326,6 +327,17 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
326 return ret; 327 return ret;
327 } 328 }
328 329
330 /* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting
331 * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten,
332 * see ipv4_pktinfo_prepare().
333 */
334 new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb));
335 if (new_rt) {
336 new_rt->rt_iif = 0;
337 skb_dst_drop(skb);
338 skb_dst_set(skb, &new_rt->dst);
339 }
340
329 return dev_loopback_xmit(net, sk, skb); 341 return dev_loopback_xmit(net, sk, skb);
330} 342}
331 343
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 0b8e06ca75d6..40a6abbc9cf6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -197,7 +197,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
197 } 197 }
198 sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, 198 sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
199 iph->saddr, iph->daddr, 199 iph->saddr, iph->daddr,
200 skb->dev->ifindex, sdif); 200 dif, sdif);
201 } 201 }
202out: 202out:
203 read_unlock(&raw_v4_hashinfo.lock); 203 read_unlock(&raw_v4_hashinfo.lock);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6cb7cff22db9..8ea0735a6754 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1647,6 +1647,39 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
1647} 1647}
1648EXPORT_SYMBOL(rt_dst_alloc); 1648EXPORT_SYMBOL(rt_dst_alloc);
1649 1649
1650struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
1651{
1652 struct rtable *new_rt;
1653
1654 new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1655 rt->dst.flags);
1656
1657 if (new_rt) {
1658 new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1659 new_rt->rt_flags = rt->rt_flags;
1660 new_rt->rt_type = rt->rt_type;
1661 new_rt->rt_is_input = rt->rt_is_input;
1662 new_rt->rt_iif = rt->rt_iif;
1663 new_rt->rt_pmtu = rt->rt_pmtu;
1664 new_rt->rt_mtu_locked = rt->rt_mtu_locked;
1665 new_rt->rt_gw_family = rt->rt_gw_family;
1666 if (rt->rt_gw_family == AF_INET)
1667 new_rt->rt_gw4 = rt->rt_gw4;
1668 else if (rt->rt_gw_family == AF_INET6)
1669 new_rt->rt_gw6 = rt->rt_gw6;
1670 INIT_LIST_HEAD(&new_rt->rt_uncached);
1671
1672 new_rt->dst.flags |= DST_HOST;
1673 new_rt->dst.input = rt->dst.input;
1674 new_rt->dst.output = rt->dst.output;
1675 new_rt->dst.error = rt->dst.error;
1676 new_rt->dst.lastuse = jiffies;
1677 new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
1678 }
1679 return new_rt;
1680}
1681EXPORT_SYMBOL(rt_dst_clone);
1682
1650/* called in rcu_read_lock() section */ 1683/* called in rcu_read_lock() section */
1651int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1684int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1652 u8 tos, struct net_device *dev, 1685 u8 tos, struct net_device *dev,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 834475717110..21efcd02f337 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -59,8 +59,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
59{ 59{
60 struct dst_entry *dst = skb_dst(skb); 60 struct dst_entry *dst = skb_dst(skb);
61 struct net_device *dev = dst->dev; 61 struct net_device *dev = dst->dev;
62 const struct in6_addr *nexthop;
62 struct neighbour *neigh; 63 struct neighbour *neigh;
63 struct in6_addr *nexthop;
64 int ret; 64 int ret;
65 65
66 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 66 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 11ad62effd56..97a843cf164c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -218,7 +218,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
218{ 218{
219 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); 219 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
220 220
221 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr); 221 return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
222 dst->dev, skb, daddr);
222} 223}
223 224
224static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) 225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
@@ -5281,7 +5282,7 @@ static struct ctl_table ipv6_route_table_template[] = {
5281 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, 5282 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5282 .maxlen = sizeof(int), 5283 .maxlen = sizeof(int),
5283 .mode = 0644, 5284 .mode = 0644,
5284 .proc_handler = proc_dointvec, 5285 .proc_handler = proc_dointvec_minmax,
5285 .extra1 = &zero, 5286 .extra1 = &zero,
5286 .extra2 = &one, 5287 .extra2 = &one,
5287 }, 5288 },
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 241317473114..cdfc33517e85 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -439,9 +439,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
439 struct nf_flowtable *flow_table = priv; 439 struct nf_flowtable *flow_table = priv;
440 struct flow_offload_tuple tuple = {}; 440 struct flow_offload_tuple tuple = {};
441 enum flow_offload_tuple_dir dir; 441 enum flow_offload_tuple_dir dir;
442 const struct in6_addr *nexthop;
442 struct flow_offload *flow; 443 struct flow_offload *flow;
443 struct net_device *outdev; 444 struct net_device *outdev;
444 struct in6_addr *nexthop;
445 struct ipv6hdr *ip6h; 445 struct ipv6hdr *ip6h;
446 struct rt6_info *rt; 446 struct rt6_info *rt;
447 447
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a29d66da7394..5f78df080573 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2401,6 +2401,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
2401 2401
2402 ts = __packet_set_timestamp(po, ph, skb); 2402 ts = __packet_set_timestamp(po, ph, skb);
2403 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); 2403 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
2404
2405 if (!packet_read_pending(&po->tx_ring))
2406 complete(&po->skb_completion);
2404 } 2407 }
2405 2408
2406 sock_wfree(skb); 2409 sock_wfree(skb);
@@ -2585,7 +2588,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
2585 2588
2586static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) 2589static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2587{ 2590{
2588 struct sk_buff *skb; 2591 struct sk_buff *skb = NULL;
2589 struct net_device *dev; 2592 struct net_device *dev;
2590 struct virtio_net_hdr *vnet_hdr = NULL; 2593 struct virtio_net_hdr *vnet_hdr = NULL;
2591 struct sockcm_cookie sockc; 2594 struct sockcm_cookie sockc;
@@ -2600,6 +2603,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2600 int len_sum = 0; 2603 int len_sum = 0;
2601 int status = TP_STATUS_AVAILABLE; 2604 int status = TP_STATUS_AVAILABLE;
2602 int hlen, tlen, copylen = 0; 2605 int hlen, tlen, copylen = 0;
2606 long timeo = 0;
2603 2607
2604 mutex_lock(&po->pg_vec_lock); 2608 mutex_lock(&po->pg_vec_lock);
2605 2609
@@ -2646,12 +2650,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2646 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) 2650 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2647 size_max = dev->mtu + reserve + VLAN_HLEN; 2651 size_max = dev->mtu + reserve + VLAN_HLEN;
2648 2652
2653 reinit_completion(&po->skb_completion);
2654
2649 do { 2655 do {
2650 ph = packet_current_frame(po, &po->tx_ring, 2656 ph = packet_current_frame(po, &po->tx_ring,
2651 TP_STATUS_SEND_REQUEST); 2657 TP_STATUS_SEND_REQUEST);
2652 if (unlikely(ph == NULL)) { 2658 if (unlikely(ph == NULL)) {
2653 if (need_wait && need_resched()) 2659 if (need_wait && skb) {
2654 schedule(); 2660 timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
2661 timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
2662 if (timeo <= 0) {
2663 err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
2664 goto out_put;
2665 }
2666 }
2667 /* check for additional frames */
2655 continue; 2668 continue;
2656 } 2669 }
2657 2670
@@ -3207,6 +3220,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
3207 sock_init_data(sock, sk); 3220 sock_init_data(sock, sk);
3208 3221
3209 po = pkt_sk(sk); 3222 po = pkt_sk(sk);
3223 init_completion(&po->skb_completion);
3210 sk->sk_family = PF_PACKET; 3224 sk->sk_family = PF_PACKET;
3211 po->num = proto; 3225 po->num = proto;
3212 po->xmit = dev_queue_xmit; 3226 po->xmit = dev_queue_xmit;
@@ -4314,7 +4328,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4314 req3->tp_sizeof_priv || 4328 req3->tp_sizeof_priv ||
4315 req3->tp_feature_req_word) { 4329 req3->tp_feature_req_word) {
4316 err = -EINVAL; 4330 err = -EINVAL;
4317 goto out; 4331 goto out_free_pg_vec;
4318 } 4332 }
4319 } 4333 }
4320 break; 4334 break;
@@ -4378,6 +4392,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4378 prb_shutdown_retire_blk_timer(po, rb_queue); 4392 prb_shutdown_retire_blk_timer(po, rb_queue);
4379 } 4393 }
4380 4394
4395out_free_pg_vec:
4381 if (pg_vec) 4396 if (pg_vec)
4382 free_pg_vec(pg_vec, order, req->tp_block_nr); 4397 free_pg_vec(pg_vec, order, req->tp_block_nr);
4383out: 4398out:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 3bb7c5fb3bff..c70a2794456f 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -128,6 +128,7 @@ struct packet_sock {
128 unsigned int tp_hdrlen; 128 unsigned int tp_hdrlen;
129 unsigned int tp_reserve; 129 unsigned int tp_reserve;
130 unsigned int tp_tstamp; 130 unsigned int tp_tstamp;
131 struct completion skb_completion;
131 struct net_device __rcu *cached_dev; 132 struct net_device __rcu *cached_dev;
132 int (*xmit)(struct sk_buff *skb); 133 int (*xmit)(struct sk_buff *skb);
133 struct packet_type prot_hook ____cacheline_aligned_in_smp; 134 struct packet_type prot_hook ____cacheline_aligned_in_smp;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index e16a3d37d2bc..732e109c3055 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -549,12 +549,17 @@ static struct notifier_block cbs_device_notifier = {
549 549
550static int __init cbs_module_init(void) 550static int __init cbs_module_init(void)
551{ 551{
552 int err = register_netdevice_notifier(&cbs_device_notifier); 552 int err;
553 553
554 err = register_netdevice_notifier(&cbs_device_notifier);
554 if (err) 555 if (err)
555 return err; 556 return err;
556 557
557 return register_qdisc(&cbs_qdisc_ops); 558 err = register_qdisc(&cbs_qdisc_ops);
559 if (err)
560 unregister_netdevice_notifier(&cbs_device_notifier);
561
562 return err;
558} 563}
559 564
560static void __exit cbs_module_exit(void) 565static void __exit cbs_module_exit(void)
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index e358437ba29b..69cebb2c998b 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -118,10 +118,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
118 /* Initialize the bind addr area */ 118 /* Initialize the bind addr area */
119 sctp_bind_addr_init(&ep->base.bind_addr, 0); 119 sctp_bind_addr_init(&ep->base.bind_addr, 0);
120 120
121 /* Remember who we are attached to. */
122 ep->base.sk = sk;
123 sock_hold(ep->base.sk);
124
125 /* Create the lists of associations. */ 121 /* Create the lists of associations. */
126 INIT_LIST_HEAD(&ep->asocs); 122 INIT_LIST_HEAD(&ep->asocs);
127 123
@@ -154,6 +150,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
154 ep->prsctp_enable = net->sctp.prsctp_enable; 150 ep->prsctp_enable = net->sctp.prsctp_enable;
155 ep->reconf_enable = net->sctp.reconf_enable; 151 ep->reconf_enable = net->sctp.reconf_enable;
156 152
153 /* Remember who we are attached to. */
154 ep->base.sk = sk;
155 sock_hold(ep->base.sk);
156
157 return ep; 157 return ep;
158 158
159nomem_shkey: 159nomem_shkey:
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 0c874e996f85..7621ec2f539c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2029,7 +2029,7 @@ static int __init smc_init(void)
2029 2029
2030 rc = smc_pnet_init(); 2030 rc = smc_pnet_init();
2031 if (rc) 2031 if (rc)
2032 return rc; 2032 goto out_pernet_subsys;
2033 2033
2034 rc = smc_llc_init(); 2034 rc = smc_llc_init();
2035 if (rc) { 2035 if (rc) {
@@ -2080,6 +2080,9 @@ out_proto:
2080 proto_unregister(&smc_proto); 2080 proto_unregister(&smc_proto);
2081out_pnet: 2081out_pnet:
2082 smc_pnet_exit(); 2082 smc_pnet_exit();
2083out_pernet_subsys:
2084 unregister_pernet_subsys(&smc_net_ops);
2085
2083 return rc; 2086 return rc;
2084} 2087}
2085 2088
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2d2850adc2a3..4ca50ddf8d16 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -652,7 +652,10 @@ create:
652 rc = smc_lgr_create(smc, ini); 652 rc = smc_lgr_create(smc, ini);
653 if (rc) 653 if (rc)
654 goto out; 654 goto out;
655 lgr = conn->lgr;
656 write_lock_bh(&lgr->conns_lock);
655 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 657 smc_lgr_register_conn(conn); /* add smc conn to lgr */
658 write_unlock_bh(&lgr->conns_lock);
656 } 659 }
657 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 660 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
658 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 661 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 027a3b07d329..0004535c0188 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -211,9 +211,14 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
211 /* Save client advertised inbound read limit for use later in accept. */ 211 /* Save client advertised inbound read limit for use later in accept. */
212 newxprt->sc_ord = param->initiator_depth; 212 newxprt->sc_ord = param->initiator_depth;
213 213
214 /* Set the local and remote addresses in the transport */
215 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; 214 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
216 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); 215 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
216 /* The remote port is arbitrary and not under the control of the
217 * client ULP. Set it to a fixed value so that the DRC continues
218 * to be effective after a reconnect.
219 */
220 rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0);
221
217 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; 222 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
218 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); 223 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
219 224
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c69951ed2ebc..36652352a38c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -950,6 +950,8 @@ static int xs_local_send_request(struct rpc_rqst *req)
950 struct sock_xprt *transport = 950 struct sock_xprt *transport =
951 container_of(xprt, struct sock_xprt, xprt); 951 container_of(xprt, struct sock_xprt, xprt);
952 struct xdr_buf *xdr = &req->rq_snd_buf; 952 struct xdr_buf *xdr = &req->rq_snd_buf;
953 rpc_fraghdr rm = xs_stream_record_marker(xdr);
954 unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
953 int status; 955 int status;
954 int sent = 0; 956 int sent = 0;
955 957
@@ -964,9 +966,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
964 966
965 req->rq_xtime = ktime_get(); 967 req->rq_xtime = ktime_get();
966 status = xs_sendpages(transport->sock, NULL, 0, xdr, 968 status = xs_sendpages(transport->sock, NULL, 0, xdr,
967 transport->xmit.offset, 969 transport->xmit.offset, rm, &sent);
968 xs_stream_record_marker(xdr),
969 &sent);
970 dprintk("RPC: %s(%u) = %d\n", 970 dprintk("RPC: %s(%u) = %d\n",
971 __func__, xdr->len - transport->xmit.offset, status); 971 __func__, xdr->len - transport->xmit.offset, status);
972 972
@@ -976,7 +976,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
976 if (likely(sent > 0) || status == 0) { 976 if (likely(sent > 0) || status == 0) {
977 transport->xmit.offset += sent; 977 transport->xmit.offset += sent;
978 req->rq_bytes_sent = transport->xmit.offset; 978 req->rq_bytes_sent = transport->xmit.offset;
979 if (likely(req->rq_bytes_sent >= req->rq_slen)) { 979 if (likely(req->rq_bytes_sent >= msglen)) {
980 req->rq_xmit_bytes_sent += transport->xmit.offset; 980 req->rq_xmit_bytes_sent += transport->xmit.offset;
981 transport->xmit.offset = 0; 981 transport->xmit.offset = 0;
982 return 0; 982 return 0;
@@ -1097,6 +1097,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
1097 struct rpc_xprt *xprt = req->rq_xprt; 1097 struct rpc_xprt *xprt = req->rq_xprt;
1098 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1098 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1099 struct xdr_buf *xdr = &req->rq_snd_buf; 1099 struct xdr_buf *xdr = &req->rq_snd_buf;
1100 rpc_fraghdr rm = xs_stream_record_marker(xdr);
1101 unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
1100 bool vm_wait = false; 1102 bool vm_wait = false;
1101 int status; 1103 int status;
1102 int sent; 1104 int sent;
@@ -1122,9 +1124,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
1122 while (1) { 1124 while (1) {
1123 sent = 0; 1125 sent = 0;
1124 status = xs_sendpages(transport->sock, NULL, 0, xdr, 1126 status = xs_sendpages(transport->sock, NULL, 0, xdr,
1125 transport->xmit.offset, 1127 transport->xmit.offset, rm, &sent);
1126 xs_stream_record_marker(xdr),
1127 &sent);
1128 1128
1129 dprintk("RPC: xs_tcp_send_request(%u) = %d\n", 1129 dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
1130 xdr->len - transport->xmit.offset, status); 1130 xdr->len - transport->xmit.offset, status);
@@ -1133,7 +1133,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
1133 * reset the count of bytes sent. */ 1133 * reset the count of bytes sent. */
1134 transport->xmit.offset += sent; 1134 transport->xmit.offset += sent;
1135 req->rq_bytes_sent = transport->xmit.offset; 1135 req->rq_bytes_sent = transport->xmit.offset;
1136 if (likely(req->rq_bytes_sent >= req->rq_slen)) { 1136 if (likely(req->rq_bytes_sent >= msglen)) {
1137 req->rq_xmit_bytes_sent += transport->xmit.offset; 1137 req->rq_xmit_bytes_sent += transport->xmit.offset;
1138 transport->xmit.offset = 0; 1138 transport->xmit.offset = 0;
1139 return 0; 1139 return 0;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index ed536c05252a..c8370722f0bb 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -134,7 +134,7 @@ static int __init tipc_init(void)
134 if (err) 134 if (err)
135 goto out_sysctl; 135 goto out_sysctl;
136 136
137 err = register_pernet_subsys(&tipc_net_ops); 137 err = register_pernet_device(&tipc_net_ops);
138 if (err) 138 if (err)
139 goto out_pernet; 139 goto out_pernet;
140 140
@@ -142,7 +142,7 @@ static int __init tipc_init(void)
142 if (err) 142 if (err)
143 goto out_socket; 143 goto out_socket;
144 144
145 err = register_pernet_subsys(&tipc_topsrv_net_ops); 145 err = register_pernet_device(&tipc_topsrv_net_ops);
146 if (err) 146 if (err)
147 goto out_pernet_topsrv; 147 goto out_pernet_topsrv;
148 148
@@ -153,11 +153,11 @@ static int __init tipc_init(void)
153 pr_info("Started in single node mode\n"); 153 pr_info("Started in single node mode\n");
154 return 0; 154 return 0;
155out_bearer: 155out_bearer:
156 unregister_pernet_subsys(&tipc_topsrv_net_ops); 156 unregister_pernet_device(&tipc_topsrv_net_ops);
157out_pernet_topsrv: 157out_pernet_topsrv:
158 tipc_socket_stop(); 158 tipc_socket_stop();
159out_socket: 159out_socket:
160 unregister_pernet_subsys(&tipc_net_ops); 160 unregister_pernet_device(&tipc_net_ops);
161out_pernet: 161out_pernet:
162 tipc_unregister_sysctl(); 162 tipc_unregister_sysctl();
163out_sysctl: 163out_sysctl:
@@ -172,9 +172,9 @@ out_netlink:
172static void __exit tipc_exit(void) 172static void __exit tipc_exit(void)
173{ 173{
174 tipc_bearer_cleanup(); 174 tipc_bearer_cleanup();
175 unregister_pernet_subsys(&tipc_topsrv_net_ops); 175 unregister_pernet_device(&tipc_topsrv_net_ops);
176 tipc_socket_stop(); 176 tipc_socket_stop();
177 unregister_pernet_subsys(&tipc_net_ops); 177 unregister_pernet_device(&tipc_net_ops);
178 tipc_netlink_stop(); 178 tipc_netlink_stop();
179 tipc_netlink_compat_stop(); 179 tipc_netlink_compat_stop();
180 tipc_unregister_sysctl(); 180 tipc_unregister_sysctl();
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index c6a04c09d075..cf155061c472 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -445,7 +445,11 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
445 if (!bearer) 445 if (!bearer)
446 return -EMSGSIZE; 446 return -EMSGSIZE;
447 447
448 len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); 448 len = TLV_GET_DATA_LEN(msg->req);
449 if (len <= 0)
450 return -EINVAL;
451
452 len = min_t(int, len, TIPC_MAX_BEARER_NAME);
449 if (!string_is_valid(name, len)) 453 if (!string_is_valid(name, len))
450 return -EINVAL; 454 return -EINVAL;
451 455
@@ -539,7 +543,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
539 543
540 name = (char *)TLV_DATA(msg->req); 544 name = (char *)TLV_DATA(msg->req);
541 545
542 len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); 546 len = TLV_GET_DATA_LEN(msg->req);
547 if (len <= 0)
548 return -EINVAL;
549
550 len = min_t(int, len, TIPC_MAX_BEARER_NAME);
543 if (!string_is_valid(name, len)) 551 if (!string_is_valid(name, len))
544 return -EINVAL; 552 return -EINVAL;
545 553
@@ -817,7 +825,11 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
817 if (!link) 825 if (!link)
818 return -EMSGSIZE; 826 return -EMSGSIZE;
819 827
820 len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); 828 len = TLV_GET_DATA_LEN(msg->req);
829 if (len <= 0)
830 return -EINVAL;
831
832 len = min_t(int, len, TIPC_MAX_BEARER_NAME);
821 if (!string_is_valid(name, len)) 833 if (!string_is_valid(name, len))
822 return -EINVAL; 834 return -EINVAL;
823 835
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index fc81ae18cc44..e2b69e805d46 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -279,7 +279,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
279 goto skip_tx_cleanup; 279 goto skip_tx_cleanup;
280 } 280 }
281 281
282 if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) 282 if (unlikely(sk->sk_write_pending) &&
283 !wait_on_pending_writer(sk, &timeo))
283 tls_handle_open_record(sk, 0); 284 tls_handle_open_record(sk, 0);
284 285
285 /* We need these for tls_sw_fallback handling of other packets */ 286 /* We need these for tls_sw_fallback handling of other packets */
diff --git a/samples/pidfd/pidfd-metadata.c b/samples/pidfd/pidfd-metadata.c
index 14b454448429..c459155daf9a 100644
--- a/samples/pidfd/pidfd-metadata.c
+++ b/samples/pidfd/pidfd-metadata.c
@@ -83,7 +83,7 @@ static int pidfd_metadata_fd(pid_t pid, int pidfd)
83 83
84int main(int argc, char *argv[]) 84int main(int argc, char *argv[])
85{ 85{
86 int pidfd = 0, ret = EXIT_FAILURE; 86 int pidfd = -1, ret = EXIT_FAILURE;
87 char buf[4096] = { 0 }; 87 char buf[4096] = { 0 };
88 pid_t pid; 88 pid_t pid;
89 int procfd, statusfd; 89 int procfd, statusfd;
@@ -91,7 +91,11 @@ int main(int argc, char *argv[])
91 91
92 pid = pidfd_clone(CLONE_PIDFD, &pidfd); 92 pid = pidfd_clone(CLONE_PIDFD, &pidfd);
93 if (pid < 0) 93 if (pid < 0)
94 exit(ret); 94 err(ret, "CLONE_PIDFD");
95 if (pidfd == -1) {
96 warnx("CLONE_PIDFD is not supported by the kernel");
97 goto out;
98 }
95 99
96 procfd = pidfd_metadata_fd(pid, pidfd); 100 procfd = pidfd_metadata_fd(pid, pidfd);
97 close(pidfd); 101 close(pidfd);
diff --git a/scripts/atomic/check-atomics.sh b/scripts/atomic/check-atomics.sh
index cfa0c2f71c84..8378c63a1e09 100755
--- a/scripts/atomic/check-atomics.sh
+++ b/scripts/atomic/check-atomics.sh
@@ -22,7 +22,7 @@ while read header; do
22 OLDSUM="$(tail -n 1 ${LINUXDIR}/include/${header})" 22 OLDSUM="$(tail -n 1 ${LINUXDIR}/include/${header})"
23 OLDSUM="${OLDSUM#// }" 23 OLDSUM="${OLDSUM#// }"
24 24
25 NEWSUM="$(head -n -1 ${LINUXDIR}/include/${header} | sha1sum)" 25 NEWSUM="$(sed '$d' ${LINUXDIR}/include/${header} | sha1sum)"
26 NEWSUM="${NEWSUM%% *}" 26 NEWSUM="${NEWSUM%% *}"
27 27
28 if [ "${OLDSUM}" != "${NEWSUM}" ]; then 28 if [ "${OLDSUM}" != "${NEWSUM}" ]; then
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 068e93c5d29c..59f1cc2557a7 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -76,7 +76,7 @@ void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new)
76 76
77 AA_BUG(!orig); 77 AA_BUG(!orig);
78 AA_BUG(!new); 78 AA_BUG(!new);
79 lockdep_assert_held_exclusive(&labels_set(orig)->lock); 79 lockdep_assert_held_write(&labels_set(orig)->lock);
80 80
81 tmp = rcu_dereference_protected(orig->proxy->label, 81 tmp = rcu_dereference_protected(orig->proxy->label,
82 &labels_ns(orig)->lock); 82 &labels_ns(orig)->lock);
@@ -566,7 +566,7 @@ static bool __label_remove(struct aa_label *label, struct aa_label *new)
566 566
567 AA_BUG(!ls); 567 AA_BUG(!ls);
568 AA_BUG(!label); 568 AA_BUG(!label);
569 lockdep_assert_held_exclusive(&ls->lock); 569 lockdep_assert_held_write(&ls->lock);
570 570
571 if (new) 571 if (new)
572 __aa_proxy_redirect(label, new); 572 __aa_proxy_redirect(label, new);
@@ -603,7 +603,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new)
603 AA_BUG(!ls); 603 AA_BUG(!ls);
604 AA_BUG(!old); 604 AA_BUG(!old);
605 AA_BUG(!new); 605 AA_BUG(!new);
606 lockdep_assert_held_exclusive(&ls->lock); 606 lockdep_assert_held_write(&ls->lock);
607 AA_BUG(new->flags & FLAG_IN_TREE); 607 AA_BUG(new->flags & FLAG_IN_TREE);
608 608
609 if (!label_is_stale(old)) 609 if (!label_is_stale(old))
@@ -640,7 +640,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls,
640 AA_BUG(!ls); 640 AA_BUG(!ls);
641 AA_BUG(!label); 641 AA_BUG(!label);
642 AA_BUG(labels_set(label) != ls); 642 AA_BUG(labels_set(label) != ls);
643 lockdep_assert_held_exclusive(&ls->lock); 643 lockdep_assert_held_write(&ls->lock);
644 AA_BUG(label->flags & FLAG_IN_TREE); 644 AA_BUG(label->flags & FLAG_IN_TREE);
645 645
646 /* Figure out where to put new node */ 646 /* Figure out where to put new node */
diff --git a/sound/core/seq/oss/seq_oss_ioctl.c b/sound/core/seq/oss/seq_oss_ioctl.c
index 96ad01fb668c..ccf682689ec9 100644
--- a/sound/core/seq/oss/seq_oss_ioctl.c
+++ b/sound/core/seq/oss/seq_oss_ioctl.c
@@ -49,7 +49,7 @@ static int snd_seq_oss_oob_user(struct seq_oss_devinfo *dp, void __user *arg)
49 if (copy_from_user(ev, arg, 8)) 49 if (copy_from_user(ev, arg, 8))
50 return -EFAULT; 50 return -EFAULT;
51 memset(&tmpev, 0, sizeof(tmpev)); 51 memset(&tmpev, 0, sizeof(tmpev));
52 snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.port, dp->addr.client); 52 snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.client, dp->addr.port);
53 tmpev.time.tick = 0; 53 tmpev.time.tick = 0;
54 if (! snd_seq_oss_process_event(dp, (union evrec *)ev, &tmpev)) { 54 if (! snd_seq_oss_process_event(dp, (union evrec *)ev, &tmpev)) {
55 snd_seq_oss_dispatch(dp, &tmpev, 0, 0); 55 snd_seq_oss_dispatch(dp, &tmpev, 0, 0);
diff --git a/sound/core/seq/oss/seq_oss_rw.c b/sound/core/seq/oss/seq_oss_rw.c
index 79ef430e56e1..537d5f423e20 100644
--- a/sound/core/seq/oss/seq_oss_rw.c
+++ b/sound/core/seq/oss/seq_oss_rw.c
@@ -161,7 +161,7 @@ insert_queue(struct seq_oss_devinfo *dp, union evrec *rec, struct file *opt)
161 memset(&event, 0, sizeof(event)); 161 memset(&event, 0, sizeof(event));
162 /* set dummy -- to be sure */ 162 /* set dummy -- to be sure */
163 event.type = SNDRV_SEQ_EVENT_NOTEOFF; 163 event.type = SNDRV_SEQ_EVENT_NOTEOFF;
164 snd_seq_oss_fill_addr(dp, &event, dp->addr.port, dp->addr.client); 164 snd_seq_oss_fill_addr(dp, &event, dp->addr.client, dp->addr.port);
165 165
166 if (snd_seq_oss_process_event(dp, rec, &event)) 166 if (snd_seq_oss_process_event(dp, rec, &event))
167 return 0; /* invalid event - no need to insert queue */ 167 return 0; /* invalid event - no need to insert queue */
diff --git a/sound/firewire/amdtp-am824.c b/sound/firewire/amdtp-am824.c
index cc6eb30f03a2..71168728940a 100644
--- a/sound/firewire/amdtp-am824.c
+++ b/sound/firewire/amdtp-am824.c
@@ -320,7 +320,7 @@ static void read_midi_messages(struct amdtp_stream *s,
320 u8 *b; 320 u8 *b;
321 321
322 for (f = 0; f < frames; f++) { 322 for (f = 0; f < frames; f++) {
323 port = (s->data_block_counter + f) % 8; 323 port = (8 - s->tx_first_dbc + s->data_block_counter + f) % 8;
324 b = (u8 *)&buffer[p->midi_position]; 324 b = (u8 *)&buffer[p->midi_position];
325 325
326 len = b[0] - 0x80; 326 len = b[0] - 0x80;
diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
index 6907dbefd08c..3842f9d34b7c 100644
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -400,27 +400,33 @@ static void setup_fg_nodes(struct hdac_device *codec)
400int snd_hdac_refresh_widgets(struct hdac_device *codec, bool sysfs) 400int snd_hdac_refresh_widgets(struct hdac_device *codec, bool sysfs)
401{ 401{
402 hda_nid_t start_nid; 402 hda_nid_t start_nid;
403 int nums, err; 403 int nums, err = 0;
404 404
405 /*
406 * Serialize against multiple threads trying to update the sysfs
407 * widgets array.
408 */
409 mutex_lock(&codec->widget_lock);
405 nums = snd_hdac_get_sub_nodes(codec, codec->afg, &start_nid); 410 nums = snd_hdac_get_sub_nodes(codec, codec->afg, &start_nid);
406 if (!start_nid || nums <= 0 || nums >= 0xff) { 411 if (!start_nid || nums <= 0 || nums >= 0xff) {
407 dev_err(&codec->dev, "cannot read sub nodes for FG 0x%02x\n", 412 dev_err(&codec->dev, "cannot read sub nodes for FG 0x%02x\n",
408 codec->afg); 413 codec->afg);
409 return -EINVAL; 414 err = -EINVAL;
415 goto unlock;
410 } 416 }
411 417
412 if (sysfs) { 418 if (sysfs) {
413 mutex_lock(&codec->widget_lock);
414 err = hda_widget_sysfs_reinit(codec, start_nid, nums); 419 err = hda_widget_sysfs_reinit(codec, start_nid, nums);
415 mutex_unlock(&codec->widget_lock);
416 if (err < 0) 420 if (err < 0)
417 return err; 421 goto unlock;
418 } 422 }
419 423
420 codec->num_nodes = nums; 424 codec->num_nodes = nums;
421 codec->start_nid = start_nid; 425 codec->start_nid = start_nid;
422 codec->end_nid = start_nid + nums; 426 codec->end_nid = start_nid + nums;
423 return 0; 427unlock:
428 mutex_unlock(&codec->widget_lock);
429 return err;
424} 430}
425EXPORT_SYMBOL_GPL(snd_hdac_refresh_widgets); 431EXPORT_SYMBOL_GPL(snd_hdac_refresh_widgets);
426 432
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5b3c26991f26..6f3a35949cdd 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2448,9 +2448,10 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
2448 SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), 2448 SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
2449 SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950), 2449 SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950),
2450 SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), 2450 SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
2451 SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS), 2451 SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950),
2452 SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS), 2452 SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950),
2453 SND_PCI_QUIRK(0x1558, 0x65d1, "Tuxedo Book XC1509", ALC1220_FIXUP_CLEVO_PB51ED_PINS), 2453 SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
2454 SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
2454 SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), 2455 SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
2455 SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), 2456 SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
2456 SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530), 2457 SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
@@ -7074,6 +7075,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
7074 SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 7075 SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
7075 SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), 7076 SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
7076 SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), 7077 SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
7078 SND_PCI_QUIRK(0x17aa, 0x3111, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
7077 SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), 7079 SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
7078 SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), 7080 SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
7079 SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), 7081 SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c
index 21127e4958b2..2c03e0f6bf72 100644
--- a/sound/usb/line6/pcm.c
+++ b/sound/usb/line6/pcm.c
@@ -556,6 +556,11 @@ int line6_init_pcm(struct usb_line6 *line6,
556 line6pcm->max_packet_size_out = 556 line6pcm->max_packet_size_out =
557 usb_maxpacket(line6->usbdev, 557 usb_maxpacket(line6->usbdev,
558 usb_sndisocpipe(line6->usbdev, ep_write), 1); 558 usb_sndisocpipe(line6->usbdev, ep_write), 1);
559 if (!line6pcm->max_packet_size_in || !line6pcm->max_packet_size_out) {
560 dev_err(line6pcm->line6->ifcdev,
561 "cannot get proper max packet size\n");
562 return -EINVAL;
563 }
559 564
560 spin_lock_init(&line6pcm->out.lock); 565 spin_lock_init(&line6pcm->out.lock);
561 spin_lock_init(&line6pcm->in.lock); 566 spin_lock_init(&line6pcm->in.lock);
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index 1f6011f36bb0..199fa157a411 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -741,7 +741,7 @@ static int snd_ni_control_init_val(struct usb_mixer_interface *mixer,
741 return err; 741 return err;
742 } 742 }
743 743
744 kctl->private_value |= (value << 24); 744 kctl->private_value |= ((unsigned int)value << 24);
745 return 0; 745 return 0;
746} 746}
747 747
@@ -902,7 +902,7 @@ static int snd_ftu_eff_switch_init(struct usb_mixer_interface *mixer,
902 if (err < 0) 902 if (err < 0)
903 return err; 903 return err;
904 904
905 kctl->private_value |= value[0] << 24; 905 kctl->private_value |= (unsigned int)value[0] << 24;
906 return 0; 906 return 0;
907} 907}
908 908
diff --git a/tools/arch/x86/include/uapi/asm/perf_regs.h b/tools/arch/x86/include/uapi/asm/perf_regs.h
index ac67bbea10ca..7c9d2bb3833b 100644
--- a/tools/arch/x86/include/uapi/asm/perf_regs.h
+++ b/tools/arch/x86/include/uapi/asm/perf_regs.h
@@ -52,4 +52,7 @@ enum perf_event_x86_regs {
52 /* These include both GPRs and XMMX registers */ 52 /* These include both GPRs and XMMX registers */
53 PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, 53 PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
54}; 54};
55
56#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
57
55#endif /* _ASM_X86_PERF_REGS_H */ 58#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/tools/include/linux/rcu.h b/tools/include/linux/rcu.h
index 7d02527e5bce..9554d3fa54f3 100644
--- a/tools/include/linux/rcu.h
+++ b/tools/include/linux/rcu.h
@@ -19,7 +19,7 @@ static inline bool rcu_is_watching(void)
19 return false; 19 return false;
20} 20}
21 21
22#define rcu_assign_pointer(p, v) ((p) = (v)) 22#define rcu_assign_pointer(p, v) do { (p) = (v); } while (0)
23#define RCU_INIT_POINTER(p, v) p=(v) 23#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0)
24 24
25#endif 25#endif
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index def9131d3d8e..5be86b1025e8 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -24,6 +24,7 @@ instructions RMW[{'once,'acquire,'release}]
24enum Barriers = 'wmb (*smp_wmb*) || 24enum Barriers = 'wmb (*smp_wmb*) ||
25 'rmb (*smp_rmb*) || 25 'rmb (*smp_rmb*) ||
26 'mb (*smp_mb*) || 26 'mb (*smp_mb*) ||
27 'barrier (*barrier*) ||
27 'rcu-lock (*rcu_read_lock*) || 28 'rcu-lock (*rcu_read_lock*) ||
28 'rcu-unlock (*rcu_read_unlock*) || 29 'rcu-unlock (*rcu_read_unlock*) ||
29 'sync-rcu (*synchronize_rcu*) || 30 'sync-rcu (*synchronize_rcu*) ||
@@ -76,3 +77,8 @@ flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
76 77
77(* Validate SRCU dynamic match *) 78(* Validate SRCU dynamic match *)
78flag ~empty different-values(srcu-rscs) as srcu-bad-nesting 79flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
80
81(* Compute marked and plain memory accesses *)
82let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
83 LKR | LKW | UL | LF | RL | RU
84let Plain = M \ Marked
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 8dcb37835b61..ea2ff4b94074 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -24,8 +24,14 @@ include "lock.cat"
24(* Basic relations *) 24(* Basic relations *)
25(*******************) 25(*******************)
26 26
27(* Release Acquire *)
28let acq-po = [Acquire] ; po ; [M]
29let po-rel = [M] ; po ; [Release]
30let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
31
27(* Fences *) 32(* Fences *)
28let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn] 33let R4rmb = R \ Noreturn (* Reads for which rmb works *)
34let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
29let wmb = [W] ; fencerel(Wmb) ; [W] 35let wmb = [W] ; fencerel(Wmb) ; [W]
30let mb = ([M] ; fencerel(Mb) ; [M]) | 36let mb = ([M] ; fencerel(Mb) ; [M]) |
31 ([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) | 37 ([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
@@ -34,13 +40,14 @@ let mb = ([M] ; fencerel(Mb) ; [M]) |
34 ([M] ; po ; [UL] ; (co | po) ; [LKW] ; 40 ([M] ; po ; [UL] ; (co | po) ; [LKW] ;
35 fencerel(After-unlock-lock) ; [M]) 41 fencerel(After-unlock-lock) ; [M])
36let gp = po ; [Sync-rcu | Sync-srcu] ; po? 42let gp = po ; [Sync-rcu | Sync-srcu] ; po?
37
38let strong-fence = mb | gp 43let strong-fence = mb | gp
39 44
40(* Release Acquire *) 45let nonrw-fence = strong-fence | po-rel | acq-po
41let acq-po = [Acquire] ; po ; [M] 46let fence = nonrw-fence | wmb | rmb
42let po-rel = [M] ; po ; [Release] 47let barrier = fencerel(Barrier | Rmb | Wmb | Mb | Sync-rcu | Sync-srcu |
43let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po 48 Before-atomic | After-atomic | Acquire | Release |
49 Rcu-lock | Rcu-unlock | Srcu-lock | Srcu-unlock) |
50 (po ; [Release]) | ([Acquire] ; po)
44 51
45(**********************************) 52(**********************************)
46(* Fundamental coherence ordering *) 53(* Fundamental coherence ordering *)
@@ -61,21 +68,22 @@ empty rmw & (fre ; coe) as atomic
61let dep = addr | data 68let dep = addr | data
62let rwdep = (dep | ctrl) ; [W] 69let rwdep = (dep | ctrl) ; [W]
63let overwrite = co | fr 70let overwrite = co | fr
64let to-w = rwdep | (overwrite & int) 71let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
65let to-r = addr | (dep ; rfi) 72let to-r = addr | (dep ; [Marked] ; rfi)
66let fence = strong-fence | wmb | po-rel | rmb | acq-po
67let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int) 73let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
68 74
69(* Propagation: Ordering from release operations and strong fences. *) 75(* Propagation: Ordering from release operations and strong fences. *)
70let A-cumul(r) = rfe? ; r 76let A-cumul(r) = (rfe ; [Marked])? ; r
71let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | po-unlock-rf-lock-po 77let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
72let prop = (overwrite & ext)? ; cumul-fence* ; rfe? 78 po-unlock-rf-lock-po) ; [Marked]
79let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
80 [Marked] ; rfe? ; [Marked]
73 81
74(* 82(*
75 * Happens Before: Ordering from the passage of time. 83 * Happens Before: Ordering from the passage of time.
76 * No fences needed here for prop because relation confined to one process. 84 * No fences needed here for prop because relation confined to one process.
77 *) 85 *)
78let hb = ppo | rfe | ((prop \ id) & int) 86let hb = [Marked] ; (ppo | rfe | ((prop \ id) & int)) ; [Marked]
79acyclic hb as happens-before 87acyclic hb as happens-before
80 88
81(****************************************) 89(****************************************)
@@ -83,7 +91,7 @@ acyclic hb as happens-before
83(****************************************) 91(****************************************)
84 92
85(* Propagation: Each non-rf link needs a strong fence. *) 93(* Propagation: Each non-rf link needs a strong fence. *)
86let pb = prop ; strong-fence ; hb* 94let pb = prop ; strong-fence ; hb* ; [Marked]
87acyclic pb as propagation 95acyclic pb as propagation
88 96
89(*******) 97(*******)
@@ -114,24 +122,28 @@ let rcu-link = po? ; hb* ; pb* ; prop ; po
114 122
115(* 123(*
116 * Any sequence containing at least as many grace periods as RCU read-side 124 * Any sequence containing at least as many grace periods as RCU read-side
117 * critical sections (joined by rcu-link) acts as a generalized strong fence. 125 * critical sections (joined by rcu-link) induces order like a generalized
126 * inter-CPU strong fence.
118 * Likewise for SRCU grace periods and read-side critical sections, provided 127 * Likewise for SRCU grace periods and read-side critical sections, provided
119 * the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same 128 * the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
120 * struct srcu_struct location. 129 * struct srcu_struct location.
121 *) 130 *)
122let rec rcu-fence = rcu-gp | srcu-gp | 131let rec rcu-order = rcu-gp | srcu-gp |
123 (rcu-gp ; rcu-link ; rcu-rscsi) | 132 (rcu-gp ; rcu-link ; rcu-rscsi) |
124 ((srcu-gp ; rcu-link ; srcu-rscsi) & loc) | 133 ((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
125 (rcu-rscsi ; rcu-link ; rcu-gp) | 134 (rcu-rscsi ; rcu-link ; rcu-gp) |
126 ((srcu-rscsi ; rcu-link ; srcu-gp) & loc) | 135 ((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
127 (rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | 136 (rcu-gp ; rcu-link ; rcu-order ; rcu-link ; rcu-rscsi) |
128 ((srcu-gp ; rcu-link ; rcu-fence ; rcu-link ; srcu-rscsi) & loc) | 137 ((srcu-gp ; rcu-link ; rcu-order ; rcu-link ; srcu-rscsi) & loc) |
129 (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) | 138 (rcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; rcu-gp) |
130 ((srcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; srcu-gp) & loc) | 139 ((srcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; srcu-gp) & loc) |
131 (rcu-fence ; rcu-link ; rcu-fence) 140 (rcu-order ; rcu-link ; rcu-order)
141let rcu-fence = po ; rcu-order ; po?
142let fence = fence | rcu-fence
143let strong-fence = strong-fence | rcu-fence
132 144
133(* rb orders instructions just as pb does *) 145(* rb orders instructions just as pb does *)
134let rb = prop ; po ; rcu-fence ; po? ; hb* ; pb* 146let rb = prop ; rcu-fence ; hb* ; pb* ; [Marked]
135 147
136irreflexive rb as rcu 148irreflexive rb as rcu
137 149
@@ -143,3 +155,49 @@ irreflexive rb as rcu
143 * let xb = hb | pb | rb 155 * let xb = hb | pb | rb
144 * acyclic xb as executes-before 156 * acyclic xb as executes-before
145 *) 157 *)
158
159(*********************************)
160(* Plain accesses and data races *)
161(*********************************)
162
163(* Warn about plain writes and marked accesses in the same region *)
164let mixed-accesses = ([Plain & W] ; (po-loc \ barrier) ; [Marked]) |
165 ([Marked] ; (po-loc \ barrier) ; [Plain & W])
166flag ~empty mixed-accesses as mixed-accesses
167
168(* Executes-before and visibility *)
169let xbstar = (hb | pb | rb)*
170let vis = cumul-fence* ; rfe? ; [Marked] ;
171 ((strong-fence ; [Marked] ; xbstar) | (xbstar & int))
172
173(* Boundaries for lifetimes of plain accesses *)
174let w-pre-bounded = [Marked] ; (addr | fence)?
175let r-pre-bounded = [Marked] ; (addr | nonrw-fence |
176 ([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))?
177let w-post-bounded = fence? ; [Marked]
178let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ;
179 [Marked]
180
181(* Visibility and executes-before for plain accesses *)
182let ww-vis = fence | (strong-fence ; xbstar ; w-pre-bounded) |
183 (w-post-bounded ; vis ; w-pre-bounded)
184let wr-vis = fence | (strong-fence ; xbstar ; r-pre-bounded) |
185 (w-post-bounded ; vis ; r-pre-bounded)
186let rw-xbstar = fence | (r-post-bounded ; xbstar ; w-pre-bounded)
187
188(* Potential races *)
189let pre-race = ext & ((Plain * M) | ((M \ IW) * Plain))
190
191(* Coherence requirements for plain accesses *)
192let wr-incoh = pre-race & rf & rw-xbstar^-1
193let rw-incoh = pre-race & fr & wr-vis^-1
194let ww-incoh = pre-race & co & ww-vis^-1
195empty (wr-incoh | rw-incoh | ww-incoh) as plain-coherence
196
197(* Actual races *)
198let ww-nonrace = ww-vis & ((Marked * W) | rw-xbstar) & ((W * Marked) | wr-vis)
199let ww-race = (pre-race & co) \ ww-nonrace
200let wr-race = (pre-race & (co? ; rf)) \ wr-vis
201let rw-race = (pre-race & fr) \ rw-xbstar
202
203flag ~empty (ww-race | wr-race | rw-race) as data-race
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 551eeaa389d4..ef0f3c1850de 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -24,6 +24,7 @@ smp_mb__before_atomic() { __fence{before-atomic}; }
24smp_mb__after_atomic() { __fence{after-atomic}; } 24smp_mb__after_atomic() { __fence{after-atomic}; }
25smp_mb__after_spinlock() { __fence{after-spinlock}; } 25smp_mb__after_spinlock() { __fence{after-spinlock}; }
26smp_mb__after_unlock_lock() { __fence{after-unlock-lock}; } 26smp_mb__after_unlock_lock() { __fence{after-unlock-lock}; }
27barrier() { __fence{barrier}; }
27 28
28// Exchange 29// Exchange
29xchg(X,V) __xchg{mb}(X,V) 30xchg(X,V) __xchg{mb}(X,V)
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index b2b60b84fb9d..172f0145301c 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -1,7 +1,7 @@
1C MP+poonceonces 1C MP+poonceonces
2 2
3(* 3(*
4 * Result: Maybe 4 * Result: Sometimes
5 * 5 *
6 * Can the counter-intuitive message-passing outcome be prevented with 6 * Can the counter-intuitive message-passing outcome be prevented with
7 * no ordering at all? 7 * no ordering at all?
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 5ee08f129094..681f9067fa9e 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -244,7 +244,7 @@ produce the name:
244Adding the ".litmus" suffix: SB+rfionceonce-poonceonces.litmus 244Adding the ".litmus" suffix: SB+rfionceonce-poonceonces.litmus
245 245
246The descriptors that describe connections between consecutive accesses 246The descriptors that describe connections between consecutive accesses
247within the cycle through a given litmus test can be provided by the herd 247within the cycle through a given litmus test can be provided by the herd7
248tool (Rfi, Po, Fre, and so on) or by the linux-kernel.bell file (Once, 248tool (Rfi, Po, Fre, and so on) or by the linux-kernel.bell file (Once,
249Release, Acquire, and so on). 249Release, Acquire, and so on).
250 250
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index a059d1a6d8a2..6b52f365d73a 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -11,7 +11,7 @@
11include "cross.cat" 11include "cross.cat"
12 12
13(* 13(*
14 * The lock-related events generated by herd are as follows: 14 * The lock-related events generated by herd7 are as follows:
15 * 15 *
16 * LKR Lock-Read: the read part of a spin_lock() or successful 16 * LKR Lock-Read: the read part of a spin_lock() or successful
17 * spin_trylock() read-modify-write event pair 17 * spin_trylock() read-modify-write event pair
diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README
index 29375a1fbbfa..095c7eb36f9f 100644
--- a/tools/memory-model/scripts/README
+++ b/tools/memory-model/scripts/README
@@ -22,7 +22,7 @@ checklitmushist.sh
22 22
23 Run all litmus tests having .litmus.out files from previous 23 Run all litmus tests having .litmus.out files from previous
24 initlitmushist.sh or newlitmushist.sh runs, comparing the 24 initlitmushist.sh or newlitmushist.sh runs, comparing the
25 herd output to that of the original runs. 25 herd7 output to that of the original runs.
26 26
27checklitmus.sh 27checklitmus.sh
28 28
@@ -43,7 +43,7 @@ initlitmushist.sh
43 43
44judgelitmus.sh 44judgelitmus.sh
45 45
46 Given a .litmus file and its .litmus.out herd output, check the 46 Given a .litmus file and its .litmus.out herd7 output, check the
47 .litmus.out file against the .litmus file's "Result:" comment to 47 .litmus.out file against the .litmus file's "Result:" comment to
48 judge whether the test ran correctly. Not normally run manually, 48 judge whether the test ran correctly. Not normally run manually,
49 provided instead for use by other scripts. 49 provided instead for use by other scripts.
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
index b35fcd61ecf6..3c0c7fbbd223 100755
--- a/tools/memory-model/scripts/checkalllitmus.sh
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -1,7 +1,7 @@
1#!/bin/sh 1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0+ 2# SPDX-License-Identifier: GPL-2.0+
3# 3#
4# Run herd tests on all .litmus files in the litmus-tests directory 4# Run herd7 tests on all .litmus files in the litmus-tests directory
5# and check each file's result against a "Result:" comment within that 5# and check each file's result against a "Result:" comment within that
6# litmus test. If the verification result does not match that specified 6# litmus test. If the verification result does not match that specified
7# in the litmus test, this script prints an error message prefixed with 7# in the litmus test, this script prints an error message prefixed with
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
index dd08801a30b0..11461ed40b5e 100755
--- a/tools/memory-model/scripts/checklitmus.sh
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -1,7 +1,7 @@
1#!/bin/sh 1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0+ 2# SPDX-License-Identifier: GPL-2.0+
3# 3#
4# Run a herd test and invokes judgelitmus.sh to check the result against 4# Run a herd7 test and invokes judgelitmus.sh to check the result against
5# a "Result:" comment within the litmus test. It also outputs verification 5# a "Result:" comment within the litmus test. It also outputs verification
6# results to a file whose name is that of the specified litmus test, but 6# results to a file whose name is that of the specified litmus test, but
7# with ".out" appended. 7# with ".out" appended.
diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index 859e1d581e05..40f52080fdbd 100644
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -91,7 +91,7 @@ do
91 shift 91 shift
92 ;; 92 ;;
93 --herdopts|--herdopt) 93 --herdopts|--herdopt)
94 checkarg --destdir "(herd options)" "$#" "$2" '.*' '^--' 94 checkarg --destdir "(herd7 options)" "$#" "$2" '.*' '^--'
95 LKMM_HERD_OPTIONS="$2" 95 LKMM_HERD_OPTIONS="$2"
96 shift 96 shift
97 ;; 97 ;;
diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh
index e507f5f933d5..6ed376f495bb 100644
--- a/tools/memory-model/scripts/runlitmushist.sh
+++ b/tools/memory-model/scripts/runlitmushist.sh
@@ -79,7 +79,7 @@ then
79 echo ' ---' Summary: 1>&2 79 echo ' ---' Summary: 1>&2
80 grep '!!!' $T/*.sh.out 1>&2 80 grep '!!!' $T/*.sh.out 1>&2
81 nfail="`grep '!!!' $T/*.sh.out | wc -l`" 81 nfail="`grep '!!!' $T/*.sh.out | wc -l`"
82 echo 'Number of failed herd runs (e.g., timeout): ' $nfail 1>&2 82 echo 'Number of failed herd7 runs (e.g., timeout): ' $nfail 1>&2
83 exit 1 83 exit 1
84else 84else
85 echo All runs completed successfully. 1>&2 85 echo All runs completed successfully. 1>&2
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index b7cd91a9014f..b7321337d100 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -9,7 +9,6 @@
9void perf_regs_load(u64 *regs); 9void perf_regs_load(u64 *regs);
10 10
11#define PERF_REGS_MAX PERF_REG_X86_XMM_MAX 11#define PERF_REGS_MAX PERF_REG_X86_XMM_MAX
12#define PERF_XMM_REGS_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
13#ifndef HAVE_ARCH_X86_64_SUPPORT 12#ifndef HAVE_ARCH_X86_64_SUPPORT
14#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) 13#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
15#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 14#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 7886ca5263e3..3666c0076df9 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -277,7 +277,7 @@ uint64_t arch__intr_reg_mask(void)
277 .type = PERF_TYPE_HARDWARE, 277 .type = PERF_TYPE_HARDWARE,
278 .config = PERF_COUNT_HW_CPU_CYCLES, 278 .config = PERF_COUNT_HW_CPU_CYCLES,
279 .sample_type = PERF_SAMPLE_REGS_INTR, 279 .sample_type = PERF_SAMPLE_REGS_INTR,
280 .sample_regs_intr = PERF_XMM_REGS_MASK, 280 .sample_regs_intr = PERF_REG_EXTENDED_MASK,
281 .precise_ip = 1, 281 .precise_ip = 1,
282 .disabled = 1, 282 .disabled = 1,
283 .exclude_kernel = 1, 283 .exclude_kernel = 1,
@@ -293,7 +293,7 @@ uint64_t arch__intr_reg_mask(void)
293 fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 293 fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
294 if (fd != -1) { 294 if (fd != -1) {
295 close(fd); 295 close(fd);
296 return (PERF_XMM_REGS_MASK | PERF_REGS_MASK); 296 return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
297 } 297 }
298 298
299 return PERF_REGS_MASK; 299 return PERF_REGS_MASK;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 698c08f851b8..8995092d541e 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -279,6 +279,51 @@ static void idr_align_test(struct idr *idr)
279 } 279 }
280} 280}
281 281
282DEFINE_IDR(find_idr);
283
284static void *idr_throbber(void *arg)
285{
286 time_t start = time(NULL);
287 int id = *(int *)arg;
288
289 rcu_register_thread();
290 do {
291 idr_alloc(&find_idr, xa_mk_value(id), id, id + 1, GFP_KERNEL);
292 idr_remove(&find_idr, id);
293 } while (time(NULL) < start + 10);
294 rcu_unregister_thread();
295
296 return NULL;
297}
298
299void idr_find_test_1(int anchor_id, int throbber_id)
300{
301 pthread_t throbber;
302 time_t start = time(NULL);
303
304 pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
305
306 BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id,
307 anchor_id + 1, GFP_KERNEL) != anchor_id);
308
309 do {
310 int id = 0;
311 void *entry = idr_get_next(&find_idr, &id);
312 BUG_ON(entry != xa_mk_value(id));
313 } while (time(NULL) < start + 11);
314
315 pthread_join(throbber, NULL);
316
317 idr_remove(&find_idr, anchor_id);
318 BUG_ON(!idr_is_empty(&find_idr));
319}
320
321void idr_find_test(void)
322{
323 idr_find_test_1(100000, 0);
324 idr_find_test_1(0, 100000);
325}
326
282void idr_checks(void) 327void idr_checks(void)
283{ 328{
284 unsigned long i; 329 unsigned long i;
@@ -360,6 +405,7 @@ void idr_checks(void)
360 idr_u32_test(1); 405 idr_u32_test(1);
361 idr_u32_test(0); 406 idr_u32_test(0);
362 idr_align_test(&idr); 407 idr_align_test(&idr);
408 idr_find_test();
363} 409}
364 410
365#define module_init(x) 411#define module_init(x)
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
index fd280b070fdb..fed468fb0c78 100644
--- a/tools/testing/radix-tree/linux/rcupdate.h
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -7,6 +7,6 @@
7#define rcu_dereference_raw(p) rcu_dereference(p) 7#define rcu_dereference_raw(p) rcu_dereference(p)
8#define rcu_dereference_protected(p, cond) rcu_dereference(p) 8#define rcu_dereference_protected(p, cond) rcu_dereference(p)
9#define rcu_dereference_check(p, cond) rcu_dereference(p) 9#define rcu_dereference_check(p, cond) rcu_dereference(p)
10#define RCU_INIT_POINTER(p, v) (p) = (v) 10#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0)
11 11
12#endif 12#endif
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index b38260e29775..241919ef1eac 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -146,6 +146,7 @@ int main(int argc, char *argv[])
146 kvm_vm_restart(vm, O_RDWR); 146 kvm_vm_restart(vm, O_RDWR);
147 vm_vcpu_add(vm, VCPU_ID, 0, 0); 147 vm_vcpu_add(vm, VCPU_ID, 0, 0);
148 vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); 148 vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
149 vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
149 vcpu_load_state(vm, VCPU_ID, state); 150 vcpu_load_state(vm, VCPU_ID, state);
150 run = vcpu_state(vm, VCPU_ID); 151 run = vcpu_state(vm, VCPU_ID);
151 free(state); 152 free(state);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index ba919308fe30..d503b8764a8e 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -3,4 +3,5 @@ subpage_prot
3tempfile 3tempfile
4prot_sao 4prot_sao
5segv_errors 5segv_errors
6wild_bctr \ No newline at end of file 6wild_bctr
7large_vm_fork_separation \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 43d68420e363..f1fbc15800c4 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,8 @@
2noarg: 2noarg:
3 $(MAKE) -C ../ 3 $(MAKE) -C ../
4 4
5TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr 5TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
6 large_vm_fork_separation
6TEST_GEN_FILES := tempfile 7TEST_GEN_FILES := tempfile
7 8
8top_srcdir = ../../../../.. 9top_srcdir = ../../../../..
@@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c
13$(OUTPUT)/prot_sao: ../utils.c 14$(OUTPUT)/prot_sao: ../utils.c
14 15
15$(OUTPUT)/wild_bctr: CFLAGS += -m64 16$(OUTPUT)/wild_bctr: CFLAGS += -m64
17$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
16 18
17$(OUTPUT)/tempfile: 19$(OUTPUT)/tempfile:
18 dd if=/dev/zero of=$@ bs=64k count=1 20 dd if=/dev/zero of=$@ bs=64k count=1
diff --git a/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c
new file mode 100644
index 000000000000..2363a7f3ab0d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c
@@ -0,0 +1,87 @@
1// SPDX-License-Identifier: GPL-2.0+
2//
3// Copyright 2019, Michael Ellerman, IBM Corp.
4//
5// Test that allocating memory beyond the memory limit and then forking is
6// handled correctly, ie. the child is able to access the mappings beyond the
7// memory limit and the child's writes are not visible to the parent.
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <sys/mman.h>
12#include <sys/types.h>
13#include <sys/wait.h>
14#include <unistd.h>
15
16#include "utils.h"
17
18
19#ifndef MAP_FIXED_NOREPLACE
20#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB
21#endif
22
23
24static int test(void)
25{
26 int p2c[2], c2p[2], rc, status, c, *p;
27 unsigned long page_size;
28 pid_t pid;
29
30 page_size = sysconf(_SC_PAGESIZE);
31 SKIP_IF(page_size != 65536);
32
33 // Create a mapping at 512TB to allocate an extended_id
34 p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE,
35 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
36 if (p == MAP_FAILED) {
37 perror("mmap");
38 printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n");
39 return 1;
40 }
41
42 printf("parent writing %p = 1\n", p);
43 *p = 1;
44
45 FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1);
46
47 pid = fork();
48 if (pid == 0) {
49 FAIL_IF(read(p2c[0], &c, 1) != 1);
50
51 pid = getpid();
52 printf("child writing %p = %d\n", p, pid);
53 *p = pid;
54
55 FAIL_IF(write(c2p[1], &c, 1) != 1);
56 FAIL_IF(read(p2c[0], &c, 1) != 1);
57 exit(0);
58 }
59
60 c = 0;
61 FAIL_IF(write(p2c[1], &c, 1) != 1);
62 FAIL_IF(read(c2p[0], &c, 1) != 1);
63
64 // Prevent compiler optimisation
65 barrier();
66
67 rc = 0;
68 printf("parent reading %p = %d\n", p, *p);
69 if (*p != 1) {
70 printf("Error: BUG! parent saw child's write! *p = %d\n", *p);
71 rc = 1;
72 }
73
74 FAIL_IF(write(p2c[1], &c, 1) != 1);
75 FAIL_IF(waitpid(pid, &status, 0) == -1);
76 FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status));
77
78 if (rc == 0)
79 printf("success: test completed OK\n");
80
81 return rc;
82}
83
84int main(void)
85{
86 return test_harness(test, "large_vm_fork_separation");
87}
diff --git a/tools/testing/selftests/rcutorture/Makefile b/tools/testing/selftests/rcutorture/Makefile
new file mode 100644
index 000000000000..5202dc666206
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/Makefile
@@ -0,0 +1,3 @@
1# SPDX-License-Identifier: GPL-2.0+
2all:
3 ( cd ../../../..; tools/testing/selftests/rcutorture/bin/kvm.sh --duration 10 --configs TREE01 )
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 40359486b3a8..93e80a42249a 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -1,7 +1,7 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0+ 2# SPDX-License-Identifier: GPL-2.0+
3# 3#
4# Usage: configinit.sh config-spec-file build-output-dir results-dir 4# Usage: configinit.sh config-spec-file results-dir
5# 5#
6# Create a .config file from the spec file. Run from the kernel source tree. 6# Create a .config file from the spec file. Run from the kernel source tree.
7# Exits with 0 if all went well, with 1 if all went well but the config 7# Exits with 0 if all went well, with 1 if all went well but the config
@@ -11,10 +11,6 @@
11# desired settings, for example, "CONFIG_NO_HZ=y". For best results, 11# desired settings, for example, "CONFIG_NO_HZ=y". For best results,
12# this should be a full pathname. 12# this should be a full pathname.
13# 13#
14# The second argument is a optional path to a build output directory,
15# for example, "O=/tmp/foo". If this argument is omitted, the .config
16# file will be generated directly in the current directory.
17#
18# Copyright (C) IBM Corporation, 2013 14# Copyright (C) IBM Corporation, 2013
19# 15#
20# Authors: Paul E. McKenney <paulmck@linux.ibm.com> 16# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@@ -26,34 +22,23 @@ mkdir $T
26# Capture config spec file. 22# Capture config spec file.
27 23
28c=$1 24c=$1
29buildloc=$2 25resdir=$2
30resdir=$3
31builddir=
32if echo $buildloc | grep -q '^O='
33then
34 builddir=`echo $buildloc | sed -e 's/^O=//'`
35 if test ! -d $builddir
36 then
37 mkdir $builddir
38 fi
39else
40 echo Bad build directory: \"$buildloc\"
41 exit 2
42fi
43 26
44sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh 27sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
45sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh 28sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
46grep '^grep' < $T/u.sh > $T/upd.sh 29grep '^grep' < $T/u.sh > $T/upd.sh
47echo "cat - $c" >> $T/upd.sh 30echo "cat - $c" >> $T/upd.sh
48make mrproper 31if test -z "$TORTURE_TRUST_MAKE"
49make $buildloc distclean > $resdir/Make.distclean 2>&1 32then
50make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1 33 make clean > $resdir/Make.clean 2>&1
51mv $builddir/.config $builddir/.config.sav 34fi
52sh $T/upd.sh < $builddir/.config.sav > $builddir/.config 35make $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
53cp $builddir/.config $builddir/.config.new 36mv .config .config.sav
54yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err 37sh $T/upd.sh < .config.sav > .config
38cp .config .config.new
39yes '' | make oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
55 40
56# verify new config matches specification. 41# verify new config matches specification.
57configcheck.sh $builddir/.config $c 42configcheck.sh .config $c
58 43
59exit 0 44exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index ff7102212703..4e9485590c10 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -9,6 +9,11 @@
9# 9#
10# Authors: Paul E. McKenney <paulmck@linux.ibm.com> 10# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
11 11
12if test -n "$TORTURE_ALLOTED_CPUS"
13then
14 echo $TORTURE_ALLOTED_CPUS
15 exit 0
16fi
12ncpus=`grep '^processor' /proc/cpuinfo | wc -l` 17ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
13idlecpus=`mpstat | tail -1 | \ 18idlecpus=`mpstat | tail -1 | \
14 awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'` 19 awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 6bcb8b5b2ff2..c3a49fb4d6f6 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -172,7 +172,7 @@ identify_qemu_append () {
172 local console=ttyS0 172 local console=ttyS0
173 case "$1" in 173 case "$1" in
174 qemu-system-x86_64|qemu-system-i386) 174 qemu-system-x86_64|qemu-system-i386)
175 echo noapic selinux=0 initcall_debug debug 175 echo selinux=0 initcall_debug debug
176 ;; 176 ;;
177 qemu-system-aarch64) 177 qemu-system-aarch64)
178 console=ttyAMA0 178 console=ttyAMA0
@@ -191,8 +191,19 @@ identify_qemu_append () {
191# Output arguments for qemu arguments based on the TORTURE_QEMU_MAC 191# Output arguments for qemu arguments based on the TORTURE_QEMU_MAC
192# and TORTURE_QEMU_INTERACTIVE environment variables. 192# and TORTURE_QEMU_INTERACTIVE environment variables.
193identify_qemu_args () { 193identify_qemu_args () {
194 local KVM_CPU=""
195 case "$1" in
196 qemu-system-x86_64)
197 KVM_CPU=kvm64
198 ;;
199 qemu-system-i386)
200 KVM_CPU=kvm32
201 ;;
202 esac
194 case "$1" in 203 case "$1" in
195 qemu-system-x86_64|qemu-system-i386) 204 qemu-system-x86_64|qemu-system-i386)
205 echo -machine q35,accel=kvm
206 echo -cpu ${KVM_CPU}
196 ;; 207 ;;
197 qemu-system-aarch64) 208 qemu-system-aarch64)
198 echo -machine virt,gic-version=host -cpu host 209 echo -machine virt,gic-version=host -cpu host
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 435b60933985..dc49a3ba6111 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -34,10 +34,15 @@ do
34 exit 0; 34 exit 0;
35 fi 35 fi
36 36
37 # Set affinity to randomly selected CPU 37 # Set affinity to randomly selected online CPU
38 cpus=`ls /sys/devices/system/cpu/*/online | 38 cpus=`grep 1 /sys/devices/system/cpu/*/online |
39 sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' | 39 sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
40 grep -v '^0*$'` 40
41 # Do not leave out poor old cpu0 which may not be hot-pluggable
42 if [ ! -f "/sys/devices/system/cpu/cpu0/online" ]; then
43 cpus="0 $cpus"
44 fi
45
41 cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN { 46 cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
42 srand(n + me + systime()); 47 srand(n + me + systime());
43 ncpus = split(cpus, ca); 48 ncpus = split(cpus, ca);
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index c27a0bbb9c02..18d6518504ee 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -3,7 +3,7 @@
3# 3#
4# Build a kvm-ready Linux kernel from the tree in the current directory. 4# Build a kvm-ready Linux kernel from the tree in the current directory.
5# 5#
6# Usage: kvm-build.sh config-template build-dir resdir 6# Usage: kvm-build.sh config-template resdir
7# 7#
8# Copyright (C) IBM Corporation, 2011 8# Copyright (C) IBM Corporation, 2011
9# 9#
@@ -15,8 +15,7 @@ then
15 echo "kvm-build.sh :$config_template: Not a readable file" 15 echo "kvm-build.sh :$config_template: Not a readable file"
16 exit 1 16 exit 1
17fi 17fi
18builddir=${2} 18resdir=${2}
19resdir=${3}
20 19
21T=${TMPDIR-/tmp}/test-linux.sh.$$ 20T=${TMPDIR-/tmp}/test-linux.sh.$$
22trap 'rm -rf $T' 0 21trap 'rm -rf $T' 0
@@ -29,14 +28,14 @@ CONFIG_VIRTIO_PCI=y
29CONFIG_VIRTIO_CONSOLE=y 28CONFIG_VIRTIO_CONSOLE=y
30___EOF___ 29___EOF___
31 30
32configinit.sh $T/config O=$builddir $resdir 31configinit.sh $T/config $resdir
33retval=$? 32retval=$?
34if test $retval -gt 1 33if test $retval -gt 1
35then 34then
36 exit 2 35 exit 2
37fi 36fi
38ncpus=`cpus2use.sh` 37ncpus=`cpus2use.sh`
39make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1 38make -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
40retval=$? 39retval=$?
41if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out 40if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
42then 41then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 8426fe1f15ee..1871d00bccd7 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -11,6 +11,7 @@
11# 11#
12# The "directory" above should end with the date/time directory, for example, 12# The "directory" above should end with the date/time directory, for example,
13# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27". 13# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
14# Returns error status reflecting the success (or not) of the specified run.
14# 15#
15# Copyright (C) IBM Corporation, 2018 16# Copyright (C) IBM Corporation, 2018
16# 17#
@@ -56,6 +57,8 @@ done
56if test -n "$files" 57if test -n "$files"
57then 58then
58 $editor $files 59 $editor $files
60 exit 1
59else 61else
60 echo No errors in console logs. 62 echo No errors in console logs.
63 exit 0
61fi 64fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 2adde6aaafdb..e5edd5198725 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -7,6 +7,8 @@
7# 7#
8# Usage: kvm-recheck.sh resdir ... 8# Usage: kvm-recheck.sh resdir ...
9# 9#
10# Returns status reflecting the success or not of the last run specified.
11#
10# Copyright (C) IBM Corporation, 2011 12# Copyright (C) IBM Corporation, 2011
11# 13#
12# Authors: Paul E. McKenney <paulmck@linux.ibm.com> 14# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@@ -28,8 +30,16 @@ do
28 TORTURE_SUITE="`cat $i/../TORTURE_SUITE`" 30 TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
29 rm -f $i/console.log.*.diags 31 rm -f $i/console.log.*.diags
30 kvm-recheck-${TORTURE_SUITE}.sh $i 32 kvm-recheck-${TORTURE_SUITE}.sh $i
31 if test -f "$i/console.log" 33 if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
34 then
35 echo QEMU error, output:
36 cat $i/qemu-output
37 elif test -f "$i/console.log"
32 then 38 then
39 if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -eq 137
40 then
41 echo QEMU killed
42 fi
33 configcheck.sh $i/.config $i/ConfigFragment 43 configcheck.sh $i/.config $i/ConfigFragment
34 if test -r $i/Make.oldconfig.err 44 if test -r $i/Make.oldconfig.err
35 then 45 then
@@ -58,3 +68,4 @@ do
58 fi 68 fi
59 done 69 done
60done 70done
71EDITOR=echo kvm-find-errors.sh "${@: -1}" > /dev/null 2>&1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 0eb1ec16d78a..27b7b5693ede 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -36,11 +36,6 @@ config_template=${1}
36config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'` 36config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
37title=`echo $config_template | sed -e 's/^.*\///'` 37title=`echo $config_template | sed -e 's/^.*\///'`
38builddir=${2} 38builddir=${2}
39if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir"
40then
41 echo "kvm-test-1-run.sh :$builddir: Not a writable directory, cannot build into it"
42 exit 1
43fi
44resdir=${3} 39resdir=${3}
45if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir" 40if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
46then 41then
@@ -85,18 +80,18 @@ then
85 ln -s $base_resdir/.config $resdir # for kvm-recheck.sh 80 ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
86 # Arch-independent indicator 81 # Arch-independent indicator
87 touch $resdir/builtkernel 82 touch $resdir/builtkernel
88elif kvm-build.sh $T/Kc2 $builddir $resdir 83elif kvm-build.sh $T/Kc2 $resdir
89then 84then
90 # Had to build a kernel for this test. 85 # Had to build a kernel for this test.
91 QEMU="`identify_qemu $builddir/vmlinux`" 86 QEMU="`identify_qemu vmlinux`"
92 BOOT_IMAGE="`identify_boot_image $QEMU`" 87 BOOT_IMAGE="`identify_boot_image $QEMU`"
93 cp $builddir/vmlinux $resdir 88 cp vmlinux $resdir
94 cp $builddir/.config $resdir 89 cp .config $resdir
95 cp $builddir/Module.symvers $resdir > /dev/null || : 90 cp Module.symvers $resdir > /dev/null || :
96 cp $builddir/System.map $resdir > /dev/null || : 91 cp System.map $resdir > /dev/null || :
97 if test -n "$BOOT_IMAGE" 92 if test -n "$BOOT_IMAGE"
98 then 93 then
99 cp $builddir/$BOOT_IMAGE $resdir 94 cp $BOOT_IMAGE $resdir
100 KERNEL=$resdir/${BOOT_IMAGE##*/} 95 KERNEL=$resdir/${BOOT_IMAGE##*/}
101 # Arch-independent indicator 96 # Arch-independent indicator
102 touch $resdir/builtkernel 97 touch $resdir/builtkernel
@@ -107,7 +102,7 @@ then
107 parse-build.sh $resdir/Make.out $title 102 parse-build.sh $resdir/Make.out $title
108else 103else
109 # Build failed. 104 # Build failed.
110 cp $builddir/.config $resdir || : 105 cp .config $resdir || :
111 echo Build failed, not running KVM, see $resdir. 106 echo Build failed, not running KVM, see $resdir.
112 if test -f $builddir.wait 107 if test -f $builddir.wait
113 then 108 then
@@ -165,7 +160,7 @@ then
165fi 160fi
166echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log 161echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
167echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd 162echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
168( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & 163( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args" > $resdir/qemu-output 2>&1 & echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
169commandcompleted=0 164commandcompleted=0
170sleep 10 # Give qemu's pid a chance to reach the file 165sleep 10 # Give qemu's pid a chance to reach the file
171if test -s "$resdir/qemu_pid" 166if test -s "$resdir/qemu_pid"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 8f1e337b9b54..72518580df23 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -24,6 +24,7 @@ dur=$((30*60))
24dryrun="" 24dryrun=""
25KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM 25KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
26PATH=${KVM}/bin:$PATH; export PATH 26PATH=${KVM}/bin:$PATH; export PATH
27TORTURE_ALLOTED_CPUS=""
27TORTURE_DEFCONFIG=defconfig 28TORTURE_DEFCONFIG=defconfig
28TORTURE_BOOT_IMAGE="" 29TORTURE_BOOT_IMAGE=""
29TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD 30TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
@@ -32,6 +33,7 @@ TORTURE_KMAKE_ARG=""
32TORTURE_QEMU_MEM=512 33TORTURE_QEMU_MEM=512
33TORTURE_SHUTDOWN_GRACE=180 34TORTURE_SHUTDOWN_GRACE=180
34TORTURE_SUITE=rcu 35TORTURE_SUITE=rcu
36TORTURE_TRUST_MAKE=""
35resdir="" 37resdir=""
36configs="" 38configs=""
37cpus=0 39cpus=0
@@ -62,6 +64,7 @@ usage () {
62 echo " --qemu-cmd qemu-system-..." 64 echo " --qemu-cmd qemu-system-..."
63 echo " --results absolute-pathname" 65 echo " --results absolute-pathname"
64 echo " --torture rcu" 66 echo " --torture rcu"
67 echo " --trust-make"
65 exit 1 68 exit 1
66} 69}
67 70
@@ -89,6 +92,7 @@ do
89 --cpus) 92 --cpus)
90 checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--' 93 checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
91 cpus=$2 94 cpus=$2
95 TORTURE_ALLOTED_CPUS="$2"
92 shift 96 shift
93 ;; 97 ;;
94 --datestamp) 98 --datestamp)
@@ -173,6 +177,9 @@ do
173 jitter=0 177 jitter=0
174 fi 178 fi
175 ;; 179 ;;
180 --trust-make)
181 TORTURE_TRUST_MAKE="y"
182 ;;
176 *) 183 *)
177 echo Unknown argument $1 184 echo Unknown argument $1
178 usage 185 usage
@@ -285,6 +292,7 @@ cat << ___EOF___ > $T/script
285CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG 292CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
286KVM="$KVM"; export KVM 293KVM="$KVM"; export KVM
287PATH="$PATH"; export PATH 294PATH="$PATH"; export PATH
295TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
288TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE 296TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
289TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY 297TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
290TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG 298TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
@@ -297,6 +305,7 @@ TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
297TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM 305TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
298TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE 306TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
299TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE 307TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
308TORTURE_TRUST_MAKE="$TORTURE_TRUST_MAKE"; export TORTURE_TRUST_MAKE
300if ! test -e $resdir 309if ! test -e $resdir
301then 310then
302 mkdir -p "$resdir" || : 311 mkdir -p "$resdir" || :
@@ -342,7 +351,7 @@ function dump(first, pastlast, batchnum)
342 print "needqemurun=" 351 print "needqemurun="
343 jn=1 352 jn=1
344 for (j = first; j < pastlast; j++) { 353 for (j = first; j < pastlast; j++) {
345 builddir=KVM "/b1" 354 builddir=KVM "/b" j - first + 1
346 cpusr[jn] = cpus[j]; 355 cpusr[jn] = cpus[j];
347 if (cfrep[cf[j]] == "") { 356 if (cfrep[cf[j]] == "") {
348 cfr[jn] = cf[j]; 357 cfr[jn] = cf[j];
@@ -358,7 +367,6 @@ function dump(first, pastlast, batchnum)
358 print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log"; 367 print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
359 print "rm -f " builddir ".*"; 368 print "rm -f " builddir ".*";
360 print "touch " builddir ".wait"; 369 print "touch " builddir ".wait";
361 print "mkdir " builddir " > /dev/null 2>&1 || :";
362 print "mkdir " rd cfr[jn] " || :"; 370 print "mkdir " rd cfr[jn] " || :";
363 print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" 371 print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
364 print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log"; 372 print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
@@ -464,3 +472,5 @@ else
464fi 472fi
465 473
466# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier 474# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
475# Function-graph tracing: ftrace=function_graph ftrace_graph_filter=sched_setaffinity,migration_cpu_stop
476# Also --kconfig "CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y"
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 0701b3bf6ade..09155c15ea65 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -21,7 +21,7 @@ mkdir $T
21 21
22. functions.sh 22. functions.sh
23 23
24if grep -q CC < $F 24if grep -q CC < $F || test -n "$TORTURE_TRUST_MAKE"
25then 25then
26 : 26 :
27else 27else
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 4508373a922f..4bf62d7b1cbc 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -106,6 +106,7 @@ fi | tee -a $file.diags
106 106
107egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | 107egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
108grep -v 'ODEBUG: ' | 108grep -v 'ODEBUG: ' |
109grep -v 'This means that this is a DEBUG kernel and it is' |
109grep -v 'Warning: unable to open an initial console' > $T.diags 110grep -v 'Warning: unable to open an initial console' > $T.diags
110if test -s $T.diags 111if test -s $T.diags
111then 112then
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index d2d2a86139db..e19a444a0684 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -1,2 +1,5 @@
1CONFIG_RCU_TORTURE_TEST=y 1CONFIG_RCU_TORTURE_TEST=y
2CONFIG_PRINTK_TIME=y 2CONFIG_PRINTK_TIME=y
3CONFIG_HYPERVISOR_GUEST=y
4CONFIG_PARAVIRT=y
5CONFIG_KVM_GUEST=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index ea47da95374b..d6da9a61d44a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -3,3 +3,4 @@ rcutree.gp_preinit_delay=3
3rcutree.gp_init_delay=3 3rcutree.gp_init_delay=3
4rcutree.gp_cleanup_delay=3 4rcutree.gp_cleanup_delay=3
5rcu_nocbs=0 5rcu_nocbs=0
6rcutorture.fwd_progress=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
new file mode 100644
index 000000000000..4d8eb5bfb6f6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
@@ -0,0 +1,14 @@
1CONFIG_SMP=y
2CONFIG_NR_CPUS=8
3CONFIG_PREEMPT_NONE=y
4CONFIG_PREEMPT_VOLUNTARY=n
5CONFIG_PREEMPT=n
6CONFIG_HZ_PERIODIC=n
7CONFIG_NO_HZ_IDLE=y
8CONFIG_NO_HZ_FULL=n
9CONFIG_HOTPLUG_CPU=n
10CONFIG_SUSPEND=n
11CONFIG_HIBERNATION=n
12CONFIG_DEBUG_LOCK_ALLOC=n
13CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
14CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot
new file mode 100644
index 000000000000..7017f5f5a55f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot
@@ -0,0 +1,3 @@
1rcutorture.torture_type=trivial
2rcutorture.onoff_interval=0
3rcutorture.shuffle_interval=0
diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c
index 8cd10662ffba..4b76450d78d1 100644
--- a/tools/testing/selftests/timers/freq-step.c
+++ b/tools/testing/selftests/timers/freq-step.c
@@ -21,9 +21,9 @@
21#define SAMPLE_READINGS 10 21#define SAMPLE_READINGS 10
22#define MEAN_SAMPLE_INTERVAL 0.1 22#define MEAN_SAMPLE_INTERVAL 0.1
23#define STEP_INTERVAL 1.0 23#define STEP_INTERVAL 1.0
24#define MAX_PRECISION 100e-9 24#define MAX_PRECISION 500e-9
25#define MAX_FREQ_ERROR 10e-6 25#define MAX_FREQ_ERROR 0.02e-6
26#define MAX_STDDEV 1000e-9 26#define MAX_STDDEV 50e-9
27 27
28#ifndef ADJ_SETOFFSET 28#ifndef ADJ_SETOFFSET
29 #define ADJ_SETOFFSET 0x0100 29 #define ADJ_SETOFFSET 0x0100
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 186520198de7..fa07d526fe39 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,8 +12,9 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
12 12
13TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ 13TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
14 check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ 14 check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
15 protection_keys test_vdso test_vsyscall mov_ss_trap 15 protection_keys test_vdso test_vsyscall mov_ss_trap \
16TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ 16 syscall_arg_fault
17TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
17 test_FCMOV test_FCOMI test_FISTTP \ 18 test_FCMOV test_FCOMI test_FISTTP \
18 vdso_restorer 19 vdso_restorer
19TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 20TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index af85bd4752a5..5ab4c60c100e 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -23,6 +23,10 @@
23#include <pthread.h> 23#include <pthread.h>
24#include <asm/ldt.h> 24#include <asm/ldt.h>
25#include <sys/mman.h> 25#include <sys/mman.h>
26#include <stddef.h>
27#include <sys/ptrace.h>
28#include <sys/wait.h>
29#include <setjmp.h>
26 30
27#ifndef __x86_64__ 31#ifndef __x86_64__
28# error This test is 64-bit only 32# error This test is 64-bit only
@@ -31,6 +35,8 @@
31static volatile sig_atomic_t want_segv; 35static volatile sig_atomic_t want_segv;
32static volatile unsigned long segv_addr; 36static volatile unsigned long segv_addr;
33 37
38static unsigned short *shared_scratch;
39
34static int nerrs; 40static int nerrs;
35 41
36static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 42static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
@@ -71,6 +77,43 @@ static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
71 77
72} 78}
73 79
80static jmp_buf jmpbuf;
81
82static void sigill(int sig, siginfo_t *si, void *ctx_void)
83{
84 siglongjmp(jmpbuf, 1);
85}
86
87static bool have_fsgsbase;
88
89static inline unsigned long rdgsbase(void)
90{
91 unsigned long gsbase;
92
93 asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
94
95 return gsbase;
96}
97
98static inline unsigned long rdfsbase(void)
99{
100 unsigned long fsbase;
101
102 asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
103
104 return fsbase;
105}
106
107static inline void wrgsbase(unsigned long gsbase)
108{
109 asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
110}
111
112static inline void wrfsbase(unsigned long fsbase)
113{
114 asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
115}
116
74enum which_base { FS, GS }; 117enum which_base { FS, GS };
75 118
76static unsigned long read_base(enum which_base which) 119static unsigned long read_base(enum which_base which)
@@ -199,16 +242,13 @@ static void do_remote_base()
199 to_set, hard_zero ? " and clear gs" : "", sel); 242 to_set, hard_zero ? " and clear gs" : "", sel);
200} 243}
201 244
202void do_unexpected_base(void) 245static __thread int set_thread_area_entry_number = -1;
246
247static unsigned short load_gs(void)
203{ 248{
204 /* 249 /*
205 * The goal here is to try to arrange for GS == 0, GSBASE != 250 * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think
206 * 0, and for the the kernel the think that GSBASE == 0. 251 * that GSBASE == 0 (i.e. thread.gsbase == 0).
207 *
208 * To make the test as reliable as possible, this uses
209 * explicit descriptorss. (This is not the only way. This
210 * could use ARCH_SET_GS with a low, nonzero base, but the
211 * relevant side effect of ARCH_SET_GS could change.)
212 */ 252 */
213 253
214 /* Step 1: tell the kernel that we have GSBASE == 0. */ 254 /* Step 1: tell the kernel that we have GSBASE == 0. */
@@ -228,8 +268,9 @@ void do_unexpected_base(void)
228 .useable = 0 268 .useable = 0
229 }; 269 };
230 if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { 270 if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
231 printf("\tother thread: using LDT slot 0\n"); 271 printf("\tusing LDT slot 0\n");
232 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7)); 272 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
273 return 0x7;
233 } else { 274 } else {
234 /* No modify_ldt for us (configured out, perhaps) */ 275 /* No modify_ldt for us (configured out, perhaps) */
235 276
@@ -239,7 +280,7 @@ void do_unexpected_base(void)
239 MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); 280 MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
240 memcpy(low_desc, &desc, sizeof(desc)); 281 memcpy(low_desc, &desc, sizeof(desc));
241 282
242 low_desc->entry_number = -1; 283 low_desc->entry_number = set_thread_area_entry_number;
243 284
244 /* 32-bit set_thread_area */ 285 /* 32-bit set_thread_area */
245 long ret; 286 long ret;
@@ -251,18 +292,43 @@ void do_unexpected_base(void)
251 292
252 if (ret != 0) { 293 if (ret != 0) {
253 printf("[NOTE]\tcould not create a segment -- test won't do anything\n"); 294 printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
254 return; 295 return 0;
255 } 296 }
256 printf("\tother thread: using GDT slot %d\n", desc.entry_number); 297 printf("\tusing GDT slot %d\n", desc.entry_number);
257 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3))); 298 set_thread_area_entry_number = desc.entry_number;
299
300 unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
301 asm volatile ("mov %0, %%gs" : : "rm" (gs));
302 return gs;
258 } 303 }
304}
259 305
260 /* 306void test_wrbase(unsigned short index, unsigned long base)
261 * Step 3: set the selector back to zero. On AMD chips, this will 307{
262 * preserve GSBASE. 308 unsigned short newindex;
263 */ 309 unsigned long newbase;
264 310
265 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); 311 printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base);
312
313 asm volatile ("mov %0, %%gs" : : "rm" (index));
314 wrgsbase(base);
315
316 remote_base = 0;
317 ftx = 1;
318 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
319 while (ftx != 0)
320 syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
321
322 asm volatile ("mov %%gs, %0" : "=rm" (newindex));
323 newbase = rdgsbase();
324
325 if (newindex == index && newbase == base) {
326 printf("[OK]\tIndex and base were preserved\n");
327 } else {
328 printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n",
329 newindex, newbase);
330 nerrs++;
331 }
266} 332}
267 333
268static void *threadproc(void *ctx) 334static void *threadproc(void *ctx)
@@ -273,12 +339,19 @@ static void *threadproc(void *ctx)
273 if (ftx == 3) 339 if (ftx == 3)
274 return NULL; 340 return NULL;
275 341
276 if (ftx == 1) 342 if (ftx == 1) {
277 do_remote_base(); 343 do_remote_base();
278 else if (ftx == 2) 344 } else if (ftx == 2) {
279 do_unexpected_base(); 345 /*
280 else 346 * On AMD chips, this causes GSBASE != 0, GS == 0, and
347 * thread.gsbase == 0.
348 */
349
350 load_gs();
351 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
352 } else {
281 errx(1, "helper thread got bad command"); 353 errx(1, "helper thread got bad command");
354 }
282 355
283 ftx = 0; 356 ftx = 0;
284 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); 357 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
@@ -367,10 +440,99 @@ static void test_unexpected_base(void)
367 } 440 }
368} 441}
369 442
443#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
444
445static void test_ptrace_write_gsbase(void)
446{
447 int status;
448 pid_t child = fork();
449
450 if (child < 0)
451 err(1, "fork");
452
453 if (child == 0) {
454 printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
455
456 *shared_scratch = load_gs();
457
458 if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
459 err(1, "PTRACE_TRACEME");
460
461 raise(SIGTRAP);
462 _exit(0);
463 }
464
465 wait(&status);
466
467 if (WSTOPSIG(status) == SIGTRAP) {
468 unsigned long gs, base;
469 unsigned long gs_offset = USER_REGS_OFFSET(gs);
470 unsigned long base_offset = USER_REGS_OFFSET(gs_base);
471
472 gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
473
474 if (gs != *shared_scratch) {
475 nerrs++;
476 printf("[FAIL]\tGS is not prepared with nonzero\n");
477 goto END;
478 }
479
480 if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0)
481 err(1, "PTRACE_POKEUSER");
482
483 gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
484 base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
485
486 /*
487 * In a non-FSGSBASE system, the nonzero selector will load
488 * GSBASE (again). But what is tested here is whether the
489 * selector value is changed or not by the GSBASE write in
490 * a ptracer.
491 */
492 if (gs != *shared_scratch) {
493 nerrs++;
494 printf("[FAIL]\tGS changed to %lx\n", gs);
495
496 /*
497 * On older kernels, poking a nonzero value into the
498 * base would zero the selector. On newer kernels,
499 * this behavior has changed -- poking the base
500 * changes only the base and, if FSGSBASE is not
501 * available, this may have no effect.
502 */
503 if (gs == 0)
504 printf("\tNote: this is expected behavior on older kernels.\n");
505 } else if (have_fsgsbase && (base != 0xFF)) {
506 nerrs++;
507 printf("[FAIL]\tGSBASE changed to %lx\n", base);
508 } else {
509 printf("[OK]\tGS remained 0x%hx%s", *shared_scratch, have_fsgsbase ? " and GSBASE changed to 0xFF" : "");
510 printf("\n");
511 }
512 }
513
514END:
515 ptrace(PTRACE_CONT, child, NULL, NULL);
516}
517
370int main() 518int main()
371{ 519{
372 pthread_t thread; 520 pthread_t thread;
373 521
522 shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
523 MAP_ANONYMOUS | MAP_SHARED, -1, 0);
524
525 /* Probe FSGSBASE */
526 sethandler(SIGILL, sigill, 0);
527 if (sigsetjmp(jmpbuf, 1) == 0) {
528 rdfsbase();
529 have_fsgsbase = true;
530 printf("\tFSGSBASE instructions are enabled\n");
531 } else {
532 printf("\tFSGSBASE instructions are disabled\n");
533 }
534 clearhandler(SIGILL);
535
374 sethandler(SIGSEGV, sigsegv, 0); 536 sethandler(SIGSEGV, sigsegv, 0);
375 537
376 check_gs_value(0); 538 check_gs_value(0);
@@ -417,11 +579,28 @@ int main()
417 579
418 test_unexpected_base(); 580 test_unexpected_base();
419 581
582 if (have_fsgsbase) {
583 unsigned short ss;
584
585 asm volatile ("mov %%ss, %0" : "=rm" (ss));
586
587 test_wrbase(0, 0);
588 test_wrbase(0, 1);
589 test_wrbase(0, 0x200000000);
590 test_wrbase(0, 0xffffffffffffffff);
591 test_wrbase(ss, 0);
592 test_wrbase(ss, 1);
593 test_wrbase(ss, 0x200000000);
594 test_wrbase(ss, 0xffffffffffffffff);
595 }
596
420 ftx = 3; /* Kill the thread. */ 597 ftx = 3; /* Kill the thread. */
421 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); 598 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
422 599
423 if (pthread_join(thread, NULL) != 0) 600 if (pthread_join(thread, NULL) != 0)
424 err(1, "pthread_join"); 601 err(1, "pthread_join");
425 602
603 test_ptrace_write_gsbase();
604
426 return nerrs == 0 ? 0 : 1; 605 return nerrs == 0 ? 0 : 1;
427} 606}
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 4e25d38c8bbd..bc0ecc2e862e 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -15,9 +15,30 @@
15#include <setjmp.h> 15#include <setjmp.h>
16#include <errno.h> 16#include <errno.h>
17 17
18#ifdef __x86_64__
19# define WIDTH "q"
20#else
21# define WIDTH "l"
22#endif
23
18/* Our sigaltstack scratch space. */ 24/* Our sigaltstack scratch space. */
19static unsigned char altstack_data[SIGSTKSZ]; 25static unsigned char altstack_data[SIGSTKSZ];
20 26
27static unsigned long get_eflags(void)
28{
29 unsigned long eflags;
30 asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
31 return eflags;
32}
33
34static void set_eflags(unsigned long eflags)
35{
36 asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
37 : : "rm" (eflags) : "flags");
38}
39
40#define X86_EFLAGS_TF (1UL << 8)
41
21static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 42static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
22 int flags) 43 int flags)
23{ 44{
@@ -35,13 +56,22 @@ static sigjmp_buf jmpbuf;
35 56
36static volatile sig_atomic_t n_errs; 57static volatile sig_atomic_t n_errs;
37 58
59#ifdef __x86_64__
60#define REG_AX REG_RAX
61#define REG_IP REG_RIP
62#else
63#define REG_AX REG_EAX
64#define REG_IP REG_EIP
65#endif
66
38static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void) 67static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
39{ 68{
40 ucontext_t *ctx = (ucontext_t*)ctx_void; 69 ucontext_t *ctx = (ucontext_t*)ctx_void;
70 long ax = (long)ctx->uc_mcontext.gregs[REG_AX];
41 71
42 if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) { 72 if (ax != -EFAULT && ax != -ENOSYS) {
43 printf("[FAIL]\tAX had the wrong value: 0x%x\n", 73 printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
44 ctx->uc_mcontext.gregs[REG_EAX]); 74 (unsigned long)ax);
45 n_errs++; 75 n_errs++;
46 } else { 76 } else {
47 printf("[OK]\tSeems okay\n"); 77 printf("[OK]\tSeems okay\n");
@@ -50,9 +80,42 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
50 siglongjmp(jmpbuf, 1); 80 siglongjmp(jmpbuf, 1);
51} 81}
52 82
83static volatile sig_atomic_t sigtrap_consecutive_syscalls;
84
85static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
86{
87 /*
88 * KVM has some bugs that can cause us to stop making progress.
89 * detect them and complain, but don't infinite loop or fail the
90 * test.
91 */
92
93 ucontext_t *ctx = (ucontext_t*)ctx_void;
94 unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
95
96 if (*ip == 0x340f || *ip == 0x050f) {
97 /* The trap was on SYSCALL or SYSENTER */
98 sigtrap_consecutive_syscalls++;
99 if (sigtrap_consecutive_syscalls > 3) {
100 printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n");
101 siglongjmp(jmpbuf, 1);
102 }
103 } else {
104 sigtrap_consecutive_syscalls = 0;
105 }
106}
107
53static void sigill(int sig, siginfo_t *info, void *ctx_void) 108static void sigill(int sig, siginfo_t *info, void *ctx_void)
54{ 109{
55 printf("[SKIP]\tIllegal instruction\n"); 110 ucontext_t *ctx = (ucontext_t*)ctx_void;
111 unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
112
113 if (*ip == 0x0b0f) {
114 /* one of the ud2 instructions faulted */
115 printf("[OK]\tSYSCALL returned normally\n");
116 } else {
117 printf("[SKIP]\tIllegal instruction\n");
118 }
56 siglongjmp(jmpbuf, 1); 119 siglongjmp(jmpbuf, 1);
57} 120}
58 121
@@ -120,9 +183,48 @@ int main()
120 "movl $-1, %%ebp\n\t" 183 "movl $-1, %%ebp\n\t"
121 "movl $-1, %%esp\n\t" 184 "movl $-1, %%esp\n\t"
122 "syscall\n\t" 185 "syscall\n\t"
123 "pushl $0" /* make sure we segfault cleanly */ 186 "ud2" /* make sure we recover cleanly */
187 : : : "memory", "flags");
188 }
189
190 printf("[RUN]\tSYSENTER with TF and invalid state\n");
191 sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
192
193 if (sigsetjmp(jmpbuf, 1) == 0) {
194 sigtrap_consecutive_syscalls = 0;
195 set_eflags(get_eflags() | X86_EFLAGS_TF);
196 asm volatile (
197 "movl $-1, %%eax\n\t"
198 "movl $-1, %%ebx\n\t"
199 "movl $-1, %%ecx\n\t"
200 "movl $-1, %%edx\n\t"
201 "movl $-1, %%esi\n\t"
202 "movl $-1, %%edi\n\t"
203 "movl $-1, %%ebp\n\t"
204 "movl $-1, %%esp\n\t"
205 "sysenter"
206 : : : "memory", "flags");
207 }
208 set_eflags(get_eflags() & ~X86_EFLAGS_TF);
209
210 printf("[RUN]\tSYSCALL with TF and invalid state\n");
211 if (sigsetjmp(jmpbuf, 1) == 0) {
212 sigtrap_consecutive_syscalls = 0;
213 set_eflags(get_eflags() | X86_EFLAGS_TF);
214 asm volatile (
215 "movl $-1, %%eax\n\t"
216 "movl $-1, %%ebx\n\t"
217 "movl $-1, %%ecx\n\t"
218 "movl $-1, %%edx\n\t"
219 "movl $-1, %%esi\n\t"
220 "movl $-1, %%edi\n\t"
221 "movl $-1, %%ebp\n\t"
222 "movl $-1, %%esp\n\t"
223 "syscall\n\t"
224 "ud2" /* make sure we recover cleanly */
124 : : : "memory", "flags"); 225 : : : "memory", "flags");
125 } 226 }
227 set_eflags(get_eflags() & ~X86_EFLAGS_TF);
126 228
127 return 0; 229 return 0;
128} 230}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 0b4f1cc2291c..4602326b8f5b 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -18,6 +18,7 @@
18#include <sched.h> 18#include <sched.h>
19#include <stdbool.h> 19#include <stdbool.h>
20#include <setjmp.h> 20#include <setjmp.h>
21#include <sys/uio.h>
21 22
22#ifdef __x86_64__ 23#ifdef __x86_64__
23# define VSYS(x) (x) 24# define VSYS(x) (x)
@@ -49,21 +50,21 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
49} 50}
50 51
51/* vsyscalls and vDSO */ 52/* vsyscalls and vDSO */
52bool should_read_vsyscall = false; 53bool vsyscall_map_r = false, vsyscall_map_x = false;
53 54
54typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); 55typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
55gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); 56const gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
56gtod_t vdso_gtod; 57gtod_t vdso_gtod;
57 58
58typedef int (*vgettime_t)(clockid_t, struct timespec *); 59typedef int (*vgettime_t)(clockid_t, struct timespec *);
59vgettime_t vdso_gettime; 60vgettime_t vdso_gettime;
60 61
61typedef long (*time_func_t)(time_t *t); 62typedef long (*time_func_t)(time_t *t);
62time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); 63const time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
63time_func_t vdso_time; 64time_func_t vdso_time;
64 65
65typedef long (*getcpu_t)(unsigned *, unsigned *, void *); 66typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
66getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); 67const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
67getcpu_t vdso_getcpu; 68getcpu_t vdso_getcpu;
68 69
69static void init_vdso(void) 70static void init_vdso(void)
@@ -107,7 +108,7 @@ static int init_vsys(void)
107 maps = fopen("/proc/self/maps", "r"); 108 maps = fopen("/proc/self/maps", "r");
108 if (!maps) { 109 if (!maps) {
109 printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); 110 printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
110 should_read_vsyscall = true; 111 vsyscall_map_r = true;
111 return 0; 112 return 0;
112 } 113 }
113 114
@@ -133,12 +134,8 @@ static int init_vsys(void)
133 } 134 }
134 135
135 printf("\tvsyscall permissions are %c-%c\n", r, x); 136 printf("\tvsyscall permissions are %c-%c\n", r, x);
136 should_read_vsyscall = (r == 'r'); 137 vsyscall_map_r = (r == 'r');
137 if (x != 'x') { 138 vsyscall_map_x = (x == 'x');
138 vgtod = NULL;
139 vtime = NULL;
140 vgetcpu = NULL;
141 }
142 139
143 found = true; 140 found = true;
144 break; 141 break;
@@ -148,10 +145,8 @@ static int init_vsys(void)
148 145
149 if (!found) { 146 if (!found) {
150 printf("\tno vsyscall map in /proc/self/maps\n"); 147 printf("\tno vsyscall map in /proc/self/maps\n");
151 should_read_vsyscall = false; 148 vsyscall_map_r = false;
152 vgtod = NULL; 149 vsyscall_map_x = false;
153 vtime = NULL;
154 vgetcpu = NULL;
155 } 150 }
156 151
157 return nerrs; 152 return nerrs;
@@ -183,9 +178,13 @@ static inline long sys_getcpu(unsigned * cpu, unsigned * node,
183} 178}
184 179
185static jmp_buf jmpbuf; 180static jmp_buf jmpbuf;
181static volatile unsigned long segv_err;
186 182
187static void sigsegv(int sig, siginfo_t *info, void *ctx_void) 183static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
188{ 184{
185 ucontext_t *ctx = (ucontext_t *)ctx_void;
186
187 segv_err = ctx->uc_mcontext.gregs[REG_ERR];
189 siglongjmp(jmpbuf, 1); 188 siglongjmp(jmpbuf, 1);
190} 189}
191 190
@@ -238,7 +237,7 @@ static int test_gtod(void)
238 err(1, "syscall gettimeofday"); 237 err(1, "syscall gettimeofday");
239 if (vdso_gtod) 238 if (vdso_gtod)
240 ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); 239 ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
241 if (vgtod) 240 if (vsyscall_map_x)
242 ret_vsys = vgtod(&tv_vsys, &tz_vsys); 241 ret_vsys = vgtod(&tv_vsys, &tz_vsys);
243 if (sys_gtod(&tv_sys2, &tz_sys) != 0) 242 if (sys_gtod(&tv_sys2, &tz_sys) != 0)
244 err(1, "syscall gettimeofday"); 243 err(1, "syscall gettimeofday");
@@ -252,7 +251,7 @@ static int test_gtod(void)
252 } 251 }
253 } 252 }
254 253
255 if (vgtod) { 254 if (vsyscall_map_x) {
256 if (ret_vsys == 0) { 255 if (ret_vsys == 0) {
257 nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); 256 nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
258 } else { 257 } else {
@@ -273,7 +272,7 @@ static int test_time(void) {
273 t_sys1 = sys_time(&t2_sys1); 272 t_sys1 = sys_time(&t2_sys1);
274 if (vdso_time) 273 if (vdso_time)
275 t_vdso = vdso_time(&t2_vdso); 274 t_vdso = vdso_time(&t2_vdso);
276 if (vtime) 275 if (vsyscall_map_x)
277 t_vsys = vtime(&t2_vsys); 276 t_vsys = vtime(&t2_vsys);
278 t_sys2 = sys_time(&t2_sys2); 277 t_sys2 = sys_time(&t2_sys2);
279 if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { 278 if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
@@ -294,7 +293,7 @@ static int test_time(void) {
294 } 293 }
295 } 294 }
296 295
297 if (vtime) { 296 if (vsyscall_map_x) {
298 if (t_vsys < 0 || t_vsys != t2_vsys) { 297 if (t_vsys < 0 || t_vsys != t2_vsys) {
299 printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); 298 printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
300 nerrs++; 299 nerrs++;
@@ -330,7 +329,7 @@ static int test_getcpu(int cpu)
330 ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); 329 ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
331 if (vdso_getcpu) 330 if (vdso_getcpu)
332 ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); 331 ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
333 if (vgetcpu) 332 if (vsyscall_map_x)
334 ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); 333 ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
335 334
336 if (ret_sys == 0) { 335 if (ret_sys == 0) {
@@ -369,7 +368,7 @@ static int test_getcpu(int cpu)
369 } 368 }
370 } 369 }
371 370
372 if (vgetcpu) { 371 if (vsyscall_map_x) {
373 if (ret_vsys) { 372 if (ret_vsys) {
374 printf("[FAIL]\tvsyscall getcpu() failed\n"); 373 printf("[FAIL]\tvsyscall getcpu() failed\n");
375 nerrs++; 374 nerrs++;
@@ -410,20 +409,88 @@ static int test_vsys_r(void)
410 can_read = false; 409 can_read = false;
411 } 410 }
412 411
413 if (can_read && !should_read_vsyscall) { 412 if (can_read && !vsyscall_map_r) {
414 printf("[FAIL]\tWe have read access, but we shouldn't\n"); 413 printf("[FAIL]\tWe have read access, but we shouldn't\n");
415 return 1; 414 return 1;
416 } else if (!can_read && should_read_vsyscall) { 415 } else if (!can_read && vsyscall_map_r) {
417 printf("[FAIL]\tWe don't have read access, but we should\n"); 416 printf("[FAIL]\tWe don't have read access, but we should\n");
418 return 1; 417 return 1;
418 } else if (can_read) {
419 printf("[OK]\tWe have read access\n");
419 } else { 420 } else {
420 printf("[OK]\tgot expected result\n"); 421 printf("[OK]\tWe do not have read access: #PF(0x%lx)\n",
422 segv_err);
421 } 423 }
422#endif 424#endif
423 425
424 return 0; 426 return 0;
425} 427}
426 428
429static int test_vsys_x(void)
430{
431#ifdef __x86_64__
432 if (vsyscall_map_x) {
433 /* We already tested this adequately. */
434 return 0;
435 }
436
437 printf("[RUN]\tMake sure that vsyscalls really page fault\n");
438
439 bool can_exec;
440 if (sigsetjmp(jmpbuf, 1) == 0) {
441 vgtod(NULL, NULL);
442 can_exec = true;
443 } else {
444 can_exec = false;
445 }
446
447 if (can_exec) {
448 printf("[FAIL]\tExecuting the vsyscall did not page fault\n");
449 return 1;
450 } else if (segv_err & (1 << 4)) { /* INSTR */
451 printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n",
452 segv_err);
453 } else {
454 printf("[FAILT]\tExecution failed with the wrong error: #PF(0x%lx)\n",
455 segv_err);
456 return 1;
457 }
458#endif
459
460 return 0;
461}
462
463static int test_process_vm_readv(void)
464{
465#ifdef __x86_64__
466 char buf[4096];
467 struct iovec local, remote;
468 int ret;
469
470 printf("[RUN]\tprocess_vm_readv() from vsyscall page\n");
471
472 local.iov_base = buf;
473 local.iov_len = 4096;
474 remote.iov_base = (void *)0xffffffffff600000;
475 remote.iov_len = 4096;
476 ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
477 if (ret != 4096) {
478 printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
479 return 0;
480 }
481
482 if (vsyscall_map_r) {
483 if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) {
484 printf("[OK]\tIt worked and read correct data\n");
485 } else {
486 printf("[FAIL]\tIt worked but returned incorrect data\n");
487 return 1;
488 }
489 }
490#endif
491
492 return 0;
493}
427 494
428#ifdef __x86_64__ 495#ifdef __x86_64__
429#define X86_EFLAGS_TF (1UL << 8) 496#define X86_EFLAGS_TF (1UL << 8)
@@ -455,7 +522,7 @@ static int test_emulation(void)
455 time_t tmp; 522 time_t tmp;
456 bool is_native; 523 bool is_native;
457 524
458 if (!vtime) 525 if (!vsyscall_map_x)
459 return 0; 526 return 0;
460 527
461 printf("[RUN]\tchecking that vsyscalls are emulated\n"); 528 printf("[RUN]\tchecking that vsyscalls are emulated\n");
@@ -497,6 +564,9 @@ int main(int argc, char **argv)
497 564
498 sethandler(SIGSEGV, sigsegv, 0); 565 sethandler(SIGSEGV, sigsegv, 0);
499 nerrs += test_vsys_r(); 566 nerrs += test_vsys_r();
567 nerrs += test_vsys_x();
568
569 nerrs += test_process_vm_readv();
500 570
501#ifdef __x86_64__ 571#ifdef __x86_64__
502 nerrs += test_emulation(); 572 nerrs += test_emulation();